summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/intel/ice
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:49:45 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:49:45 +0000
commit2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch)
tree848558de17fb3008cdf4d861b01ac7781903ce39 /drivers/net/ethernet/intel/ice
parentInitial commit. (diff)
downloadlinux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz
linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip
Adding upstream version 6.1.76.upstream/6.1.76
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/net/ethernet/intel/ice')
-rw-r--r--drivers/net/ethernet/intel/ice/Makefile49
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h944
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h2363
-rw-r--r--drivers/net/ethernet/intel/ice/ice_arfs.c656
-rw-r--r--drivers/net/ethernet/intel/ice/ice_arfs.h85
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c1019
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.h33
-rw-r--r--drivers/net/ethernet/intel/ice/ice_cgu_regs.h116
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c5514
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h232
-rw-r--r--drivers/net/ethernet/intel/ice/ice_controlq.c1242
-rw-r--r--drivers/net/ethernet/intel/ice/ice_controlq.h100
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb.c1617
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb.h198
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.c1048
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.h138
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_nl.c1096
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_nl.h20
-rw-r--r--drivers/net/ethernet/intel/ice/ice_devids.h67
-rw-r--r--drivers/net/ethernet/intel/ice/ice_devlink.c1366
-rw-r--r--drivers/net/ethernet/intel/ice/ice_devlink.h21
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch.c717
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch.h86
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c4285
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c1939
-rw-r--r--drivers/net/ethernet/intel/ice/ice_fdir.c1302
-rw-r--r--drivers/net/ethernet/intel/ice/ice_fdir.h223
-rw-r--r--drivers/net/ethernet/intel/ice/ice_flex_pipe.c6129
-rw-r--r--drivers/net/ethernet/intel/ice/ice_flex_pipe.h135
-rw-r--r--drivers/net/ethernet/intel/ice/ice_flex_type.h709
-rw-r--r--drivers/net/ethernet/intel/ice/ice_flow.c2474
-rw-r--r--drivers/net/ethernet/intel/ice/ice_flow.h416
-rw-r--r--drivers/net/ethernet/intel/ice/ice_fltr.c476
-rw-r--r--drivers/net/ethernet/intel/ice/ice_fltr.h51
-rw-r--r--drivers/net/ethernet/intel/ice/ice_fw_update.c1051
-rw-r--r--drivers/net/ethernet/intel/ice/ice_fw_update.h13
-rw-r--r--drivers/net/ethernet/intel/ice/ice_gnss.c572
-rw-r--r--drivers/net/ethernet/intel/ice/ice_gnss.h74
-rw-r--r--drivers/net/ethernet/intel/ice/ice_hw_autogen.h502
-rw-r--r--drivers/net/ethernet/intel/ice/ice_idc.c342
-rw-r--r--drivers/net/ethernet/intel/ice/ice_idc_int.h13
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.c457
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.h87
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h924
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c4298
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.h133
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c9194
-rw-r--r--drivers/net/ethernet/intel/ice/ice_nvm.c1240
-rw-r--r--drivers/net/ethernet/intel/ice/ice_nvm.h46
-rw-r--r--drivers/net/ethernet/intel/ice/ice_osdep.h81
-rw-r--r--drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.c38
-rw-r--r--drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.h13
-rw-r--r--drivers/net/ethernet/intel/ice/ice_protocol_type.h290
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c2734
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.h297
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp_consts.h374
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp_hw.c3444
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp_hw.h461
-rw-r--r--drivers/net/ethernet/intel/ice/ice_repr.c467
-rw-r--r--drivers/net/ethernet/intel/ice/ice_repr.h32
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sbq_cmd.h92
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.c4263
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.h124
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.c1911
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.h161
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c6709
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.h397
-rw-r--r--drivers/net/ethernet/intel/ice/ice_tc_lib.c1823
-rw-r--r--drivers/net/ethernet/intel/ice/ice_tc_lib.h190
-rw-r--r--drivers/net/ethernet/intel/ice/ice_trace.h256
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c2520
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h445
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.c369
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.h83
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h1148
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.c1143
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.h294
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib_private.h40
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_mbx.c532
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_mbx.h52
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c225
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h19
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.c3826
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.h82
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c181
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h13
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c2029
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h56
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vlan.h18
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vlan_mode.c438
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vlan_mode.h13
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c707
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h32
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c103
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h29
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c1073
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.h81
97 files changed, 95520 insertions, 0 deletions
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
new file mode 100644
index 000000000..9183d480b
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018, Intel Corporation.
+
+#
+# Makefile for the Intel(R) Ethernet Connection E800 Series Linux Driver
+#
+
+obj-$(CONFIG_ICE) += ice.o
+
+ice-y := ice_main.o \
+ ice_controlq.o \
+ ice_common.o \
+ ice_nvm.o \
+ ice_switch.o \
+ ice_sched.o \
+ ice_base.o \
+ ice_lib.o \
+ ice_txrx_lib.o \
+ ice_txrx.o \
+ ice_fltr.o \
+ ice_pf_vsi_vlan_ops.o \
+ ice_vsi_vlan_ops.o \
+ ice_vsi_vlan_lib.o \
+ ice_fdir.o \
+ ice_ethtool_fdir.o \
+ ice_vlan_mode.o \
+ ice_flex_pipe.o \
+ ice_flow.o \
+ ice_idc.o \
+ ice_devlink.o \
+ ice_fw_update.o \
+ ice_lag.o \
+ ice_ethtool.o \
+ ice_repr.o \
+ ice_tc_lib.o
+ice-$(CONFIG_PCI_IOV) += \
+ ice_sriov.o \
+ ice_virtchnl.o \
+ ice_virtchnl_allowlist.o \
+ ice_virtchnl_fdir.o \
+ ice_vf_mbx.o \
+ ice_vf_vsi_vlan_ops.o \
+ ice_vf_lib.o
+ice-$(CONFIG_PTP_1588_CLOCK) += ice_ptp.o ice_ptp_hw.o
+ice-$(CONFIG_TTY) += ice_gnss.o
+ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
+ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o
+ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o
+ice-$(CONFIG_ICE_SWITCHDEV) += ice_eswitch.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
new file mode 100644
index 000000000..f2be383d9
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -0,0 +1,944 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_H_
+#define _ICE_H_
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/firmware.h>
+#include <linux/netdevice.h>
+#include <linux/compiler.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/cpumask.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_vlan.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+#include <linux/aer.h>
+#include <linux/interrupt.h>
+#include <linux/ethtool.h>
+#include <linux/timer.h>
+#include <linux/delay.h>
+#include <linux/bitmap.h>
+#include <linux/log2.h>
+#include <linux/ip.h>
+#include <linux/sctp.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_sched.h>
+#include <linux/if_bridge.h>
+#include <linux/ctype.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/auxiliary_bus.h>
+#include <linux/avf/virtchnl.h>
+#include <linux/cpu_rmap.h>
+#include <linux/dim.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/ip.h>
+#include <net/devlink.h>
+#include <net/ipv6.h>
+#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
+#include <net/geneve.h>
+#include <net/gre.h>
+#include <net/udp_tunnel.h>
+#include <net/vxlan.h>
+#include <net/gtp.h>
+#include <linux/ppp_defs.h>
+#include "ice_devids.h"
+#include "ice_type.h"
+#include "ice_txrx.h"
+#include "ice_dcb.h"
+#include "ice_switch.h"
+#include "ice_common.h"
+#include "ice_flow.h"
+#include "ice_sched.h"
+#include "ice_idc_int.h"
+#include "ice_sriov.h"
+#include "ice_vf_mbx.h"
+#include "ice_ptp.h"
+#include "ice_fdir.h"
+#include "ice_xsk.h"
+#include "ice_arfs.h"
+#include "ice_repr.h"
+#include "ice_eswitch.h"
+#include "ice_lag.h"
+#include "ice_vsi_vlan_ops.h"
+#include "ice_gnss.h"
+
+#define ICE_BAR0 0
+#define ICE_REQ_DESC_MULTIPLE 32
+#define ICE_MIN_NUM_DESC 64
+#define ICE_MAX_NUM_DESC 8160
+#define ICE_DFLT_MIN_RX_DESC 512
+#define ICE_DFLT_NUM_TX_DESC 256
+#define ICE_DFLT_NUM_RX_DESC 2048
+
+#define ICE_DFLT_TRAFFIC_CLASS BIT(0)
+#define ICE_INT_NAME_STR_LEN (IFNAMSIZ + 16)
+#define ICE_AQ_LEN 192
+#define ICE_MBXSQ_LEN 64
+#define ICE_SBQ_LEN 64
+#define ICE_MIN_LAN_TXRX_MSIX 1
+#define ICE_MIN_LAN_OICR_MSIX 1
+#define ICE_MIN_MSIX (ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX)
+#define ICE_FDIR_MSIX 2
+#define ICE_RDMA_NUM_AEQ_MSIX 4
+#define ICE_MIN_RDMA_MSIX 2
+#define ICE_ESWITCH_MSIX 1
+#define ICE_NO_VSI 0xffff
+#define ICE_VSI_MAP_CONTIG 0
+#define ICE_VSI_MAP_SCATTER 1
+#define ICE_MAX_SCATTER_TXQS 16
+#define ICE_MAX_SCATTER_RXQS 16
+#define ICE_Q_WAIT_RETRY_LIMIT 10
+#define ICE_Q_WAIT_MAX_RETRY (5 * ICE_Q_WAIT_RETRY_LIMIT)
+#define ICE_MAX_LG_RSS_QS 256
+#define ICE_RES_VALID_BIT 0x8000
+#define ICE_RES_MISC_VEC_ID (ICE_RES_VALID_BIT - 1)
+#define ICE_RES_RDMA_VEC_ID (ICE_RES_MISC_VEC_ID - 1)
+/* All VF control VSIs share the same IRQ, so assign a unique ID for them */
+#define ICE_RES_VF_CTRL_VEC_ID (ICE_RES_RDMA_VEC_ID - 1)
+#define ICE_INVAL_Q_INDEX 0xffff
+
+#define ICE_MAX_RXQS_PER_TC 256 /* Used when setting VSI context per TC Rx queues */
+
+#define ICE_CHNL_START_TC 1
+
+#define ICE_MAX_RESET_WAIT 20
+
+#define ICE_VSIQF_HKEY_ARRAY_SIZE ((VSIQF_HKEY_MAX_INDEX + 1) * 4)
+
+#define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
+
+#define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - ICE_ETH_PKT_HDR_PAD)
+
+#define ICE_UP_TABLE_TRANSLATE(val, i) \
+ (((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \
+ ICE_AQ_VSI_UP_TABLE_UP##i##_M)
+
+#define ICE_TX_DESC(R, i) (&(((struct ice_tx_desc *)((R)->desc))[i]))
+#define ICE_RX_DESC(R, i) (&(((union ice_32b_rx_flex_desc *)((R)->desc))[i]))
+#define ICE_TX_CTX_DESC(R, i) (&(((struct ice_tx_ctx_desc *)((R)->desc))[i]))
+#define ICE_TX_FDIRDESC(R, i) (&(((struct ice_fltr_desc *)((R)->desc))[i]))
+
+/* Minimum BW limit is 500 Kbps for any scheduler node */
+#define ICE_MIN_BW_LIMIT 500
+/* User can specify BW in either Kbit/Mbit/Gbit and OS converts it in bytes.
+ * use it to convert user specified BW limit into Kbps
+ */
+#define ICE_BW_KBPS_DIVISOR 125
+
+/* Macro for each VSI in a PF */
+#define ice_for_each_vsi(pf, i) \
+ for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++)
+
+/* Macros for each Tx/Xdp/Rx ring in a VSI */
+#define ice_for_each_txq(vsi, i) \
+ for ((i) = 0; (i) < (vsi)->num_txq; (i)++)
+
+#define ice_for_each_xdp_txq(vsi, i) \
+ for ((i) = 0; (i) < (vsi)->num_xdp_txq; (i)++)
+
+#define ice_for_each_rxq(vsi, i) \
+ for ((i) = 0; (i) < (vsi)->num_rxq; (i)++)
+
+/* Macros for each allocated Tx/Rx ring whether used or not in a VSI */
+#define ice_for_each_alloc_txq(vsi, i) \
+ for ((i) = 0; (i) < (vsi)->alloc_txq; (i)++)
+
+#define ice_for_each_alloc_rxq(vsi, i) \
+ for ((i) = 0; (i) < (vsi)->alloc_rxq; (i)++)
+
+#define ice_for_each_q_vector(vsi, i) \
+ for ((i) = 0; (i) < (vsi)->num_q_vectors; (i)++)
+
+#define ice_for_each_chnl_tc(i) \
+ for ((i) = ICE_CHNL_START_TC; (i) < ICE_CHNL_MAX_TC; (i)++)
+
+#define ICE_UCAST_PROMISC_BITS (ICE_PROMISC_UCAST_TX | ICE_PROMISC_UCAST_RX)
+
+#define ICE_UCAST_VLAN_PROMISC_BITS (ICE_PROMISC_UCAST_TX | \
+ ICE_PROMISC_UCAST_RX | \
+ ICE_PROMISC_VLAN_TX | \
+ ICE_PROMISC_VLAN_RX)
+
+#define ICE_MCAST_PROMISC_BITS (ICE_PROMISC_MCAST_TX | ICE_PROMISC_MCAST_RX)
+
+#define ICE_MCAST_VLAN_PROMISC_BITS (ICE_PROMISC_MCAST_TX | \
+ ICE_PROMISC_MCAST_RX | \
+ ICE_PROMISC_VLAN_TX | \
+ ICE_PROMISC_VLAN_RX)
+
+#define ice_pf_to_dev(pf) (&((pf)->pdev->dev))
+
+enum ice_feature {
+ ICE_F_DSCP,
+ ICE_F_PTP_EXTTS,
+ ICE_F_SMA_CTRL,
+ ICE_F_GNSS,
+ ICE_F_MAX
+};
+
+DECLARE_STATIC_KEY_FALSE(ice_xdp_locking_key);
+
+struct ice_channel {
+ struct list_head list;
+ u8 type;
+ u16 sw_id;
+ u16 base_q;
+ u16 num_rxq;
+ u16 num_txq;
+ u16 vsi_num;
+ u8 ena_tc;
+ struct ice_aqc_vsi_props info;
+ u64 max_tx_rate;
+ u64 min_tx_rate;
+ atomic_t num_sb_fltr;
+ struct ice_vsi *ch_vsi;
+};
+
+struct ice_txq_meta {
+ u32 q_teid; /* Tx-scheduler element identifier */
+ u16 q_id; /* Entry in VSI's txq_map bitmap */
+ u16 q_handle; /* Relative index of Tx queue within TC */
+ u16 vsi_idx; /* VSI index that Tx queue belongs to */
+ u8 tc; /* TC number that Tx queue belongs to */
+};
+
+struct ice_tc_info {
+ u16 qoffset;
+ u16 qcount_tx;
+ u16 qcount_rx;
+ u8 netdev_tc;
+};
+
+struct ice_tc_cfg {
+ u8 numtc; /* Total number of enabled TCs */
+ u16 ena_tc; /* Tx map */
+ struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS];
+};
+
+struct ice_res_tracker {
+ u16 num_entries;
+ u16 end;
+ u16 list[];
+};
+
+struct ice_qs_cfg {
+ struct mutex *qs_mutex; /* will be assigned to &pf->avail_q_mutex */
+ unsigned long *pf_map;
+ unsigned long pf_map_size;
+ unsigned int q_count;
+ unsigned int scatter_count;
+ u16 *vsi_map;
+ u16 vsi_map_offset;
+ u8 mapping_mode;
+};
+
+struct ice_sw {
+ struct ice_pf *pf;
+ u16 sw_id; /* switch ID for this switch */
+ u16 bridge_mode; /* VEB/VEPA/Port Virtualizer */
+};
+
+enum ice_pf_state {
+ ICE_TESTING,
+ ICE_DOWN,
+ ICE_NEEDS_RESTART,
+ ICE_PREPARED_FOR_RESET, /* set by driver when prepared */
+ ICE_RESET_OICR_RECV, /* set by driver after rcv reset OICR */
+ ICE_PFR_REQ, /* set by driver */
+ ICE_CORER_REQ, /* set by driver */
+ ICE_GLOBR_REQ, /* set by driver */
+ ICE_CORER_RECV, /* set by OICR handler */
+ ICE_GLOBR_RECV, /* set by OICR handler */
+ ICE_EMPR_RECV, /* set by OICR handler */
+ ICE_SUSPENDED, /* set on module remove path */
+ ICE_RESET_FAILED, /* set by reset/rebuild */
+ /* When checking for the PF to be in a nominal operating state, the
+ * bits that are grouped at the beginning of the list need to be
+ * checked. Bits occurring before ICE_STATE_NOMINAL_CHECK_BITS will
+ * be checked. If you need to add a bit into consideration for nominal
+ * operating state, it must be added before
+ * ICE_STATE_NOMINAL_CHECK_BITS. Do not move this entry's position
+ * without appropriate consideration.
+ */
+ ICE_STATE_NOMINAL_CHECK_BITS,
+ ICE_ADMINQ_EVENT_PENDING,
+ ICE_MAILBOXQ_EVENT_PENDING,
+ ICE_SIDEBANDQ_EVENT_PENDING,
+ ICE_MDD_EVENT_PENDING,
+ ICE_VFLR_EVENT_PENDING,
+ ICE_FLTR_OVERFLOW_PROMISC,
+ ICE_VF_DIS,
+ ICE_CFG_BUSY,
+ ICE_SERVICE_SCHED,
+ ICE_SERVICE_DIS,
+ ICE_FD_FLUSH_REQ,
+ ICE_OICR_INTR_DIS, /* Global OICR interrupt disabled */
+ ICE_MDD_VF_PRINT_PENDING, /* set when MDD event handle */
+ ICE_VF_RESETS_DISABLED, /* disable resets during ice_remove */
+ ICE_LINK_DEFAULT_OVERRIDE_PENDING,
+ ICE_PHY_INIT_COMPLETE,
+ ICE_FD_VF_FLUSH_CTX, /* set at FD Rx IRQ or timeout */
+ ICE_AUX_ERR_PENDING,
+ ICE_STATE_NBITS /* must be last */
+};
+
+enum ice_vsi_state {
+ ICE_VSI_DOWN,
+ ICE_VSI_NEEDS_RESTART,
+ ICE_VSI_NETDEV_ALLOCD,
+ ICE_VSI_NETDEV_REGISTERED,
+ ICE_VSI_UMAC_FLTR_CHANGED,
+ ICE_VSI_MMAC_FLTR_CHANGED,
+ ICE_VSI_PROMISC_CHANGED,
+ ICE_VSI_STATE_NBITS /* must be last */
+};
+
+/* struct that defines a VSI, associated with a dev */
+struct ice_vsi {
+ struct net_device *netdev;
+ struct ice_sw *vsw; /* switch this VSI is on */
+ struct ice_pf *back; /* back pointer to PF */
+ struct ice_port_info *port_info; /* back pointer to port_info */
+ struct ice_rx_ring **rx_rings; /* Rx ring array */
+ struct ice_tx_ring **tx_rings; /* Tx ring array */
+ struct ice_q_vector **q_vectors; /* q_vector array */
+
+ irqreturn_t (*irq_handler)(int irq, void *data);
+
+ u64 tx_linearize;
+ DECLARE_BITMAP(state, ICE_VSI_STATE_NBITS);
+ unsigned int current_netdev_flags;
+ u32 tx_restart;
+ u32 tx_busy;
+ u32 rx_buf_failed;
+ u32 rx_page_failed;
+ u16 num_q_vectors;
+ u16 base_vector; /* IRQ base for OS reserved vectors */
+ enum ice_vsi_type type;
+ u16 vsi_num; /* HW (absolute) index of this VSI */
+ u16 idx; /* software index in pf->vsi[] */
+
+ struct ice_vf *vf; /* VF associated with this VSI */
+
+ u16 ethtype; /* Ethernet protocol for pause frame */
+ u16 num_gfltr;
+ u16 num_bfltr;
+
+ /* RSS config */
+ u16 rss_table_size; /* HW RSS table size */
+ u16 rss_size; /* Allocated RSS queues */
+ u8 *rss_hkey_user; /* User configured hash keys */
+ u8 *rss_lut_user; /* User configured lookup table entries */
+ u8 rss_lut_type; /* used to configure Get/Set RSS LUT AQ call */
+
+ /* aRFS members only allocated for the PF VSI */
+#define ICE_MAX_ARFS_LIST 1024
+#define ICE_ARFS_LST_MASK (ICE_MAX_ARFS_LIST - 1)
+ struct hlist_head *arfs_fltr_list;
+ struct ice_arfs_active_fltr_cntrs *arfs_fltr_cntrs;
+ spinlock_t arfs_lock; /* protects aRFS hash table and filter state */
+ atomic_t *arfs_last_fltr_id;
+
+ u16 max_frame;
+ u16 rx_buf_len;
+
+ struct ice_aqc_vsi_props info; /* VSI properties */
+
+ /* VSI stats */
+ struct rtnl_link_stats64 net_stats;
+ struct ice_eth_stats eth_stats;
+ struct ice_eth_stats eth_stats_prev;
+
+ struct list_head tmp_sync_list; /* MAC filters to be synced */
+ struct list_head tmp_unsync_list; /* MAC filters to be unsynced */
+
+ u8 irqs_ready:1;
+ u8 current_isup:1; /* Sync 'link up' logging */
+ u8 stat_offsets_loaded:1;
+ struct ice_vsi_vlan_ops inner_vlan_ops;
+ struct ice_vsi_vlan_ops outer_vlan_ops;
+ u16 num_vlan;
+
+ /* queue information */
+ u8 tx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+ u8 rx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+ u16 *txq_map; /* index in pf->avail_txqs */
+ u16 *rxq_map; /* index in pf->avail_rxqs */
+ u16 alloc_txq; /* Allocated Tx queues */
+ u16 num_txq; /* Used Tx queues */
+ u16 alloc_rxq; /* Allocated Rx queues */
+ u16 num_rxq; /* Used Rx queues */
+ u16 req_txq; /* User requested Tx queues */
+ u16 req_rxq; /* User requested Rx queues */
+ u16 num_rx_desc;
+ u16 num_tx_desc;
+ u16 qset_handle[ICE_MAX_TRAFFIC_CLASS];
+ struct ice_tc_cfg tc_cfg;
+ struct bpf_prog *xdp_prog;
+ struct ice_tx_ring **xdp_rings; /* XDP ring array */
+ unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled qps */
+ u16 num_xdp_txq; /* Used XDP queues */
+ u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+
+ struct net_device **target_netdevs;
+
+ struct tc_mqprio_qopt_offload mqprio_qopt; /* queue parameters */
+
+ /* Channel Specific Fields */
+ struct ice_vsi *tc_map_vsi[ICE_CHNL_MAX_TC];
+ u16 cnt_q_avail;
+ u16 next_base_q; /* next queue to be used for channel setup */
+ struct list_head ch_list;
+ u16 num_chnl_rxq;
+ u16 num_chnl_txq;
+ u16 ch_rss_size;
+ u16 num_chnl_fltr;
+ /* store away rss size info before configuring ADQ channels so that,
+ * it can be used after tc-qdisc delete, to get back RSS setting as
+ * they were before
+ */
+ u16 orig_rss_size;
+ /* this keeps tracks of all enabled TC with and without DCB
+ * and inclusive of ADQ, vsi->mqprio_opt keeps track of queue
+ * information
+ */
+ u8 all_numtc;
+ u16 all_enatc;
+
+ /* store away TC info, to be used for rebuild logic */
+ u8 old_numtc;
+ u16 old_ena_tc;
+
+ struct ice_channel *ch;
+
+ /* setup back reference, to which aggregator node this VSI
+ * corresponds to
+ */
+ struct ice_agg_node *agg_node;
+} ____cacheline_internodealigned_in_smp;
+
+/* struct that defines an interrupt vector */
+struct ice_q_vector {
+ struct ice_vsi *vsi;
+
+ u16 v_idx; /* index in the vsi->q_vector array. */
+ u16 reg_idx;
+ u8 num_ring_rx; /* total number of Rx rings in vector */
+ u8 num_ring_tx; /* total number of Tx rings in vector */
+ u8 wb_on_itr:1; /* if true, WB on ITR is enabled */
+ /* in usecs, need to use ice_intrl_to_usecs_reg() before writing this
+ * value to the device
+ */
+ u8 intrl;
+
+ struct napi_struct napi;
+
+ struct ice_ring_container rx;
+ struct ice_ring_container tx;
+
+ cpumask_t affinity_mask;
+ struct irq_affinity_notify affinity_notify;
+
+ struct ice_channel *ch;
+
+ char name[ICE_INT_NAME_STR_LEN];
+
+ u16 total_events; /* net_dim(): number of interrupts processed */
+} ____cacheline_internodealigned_in_smp;
+
+enum ice_pf_flags {
+ ICE_FLAG_FLTR_SYNC,
+ ICE_FLAG_RDMA_ENA,
+ ICE_FLAG_RSS_ENA,
+ ICE_FLAG_SRIOV_ENA,
+ ICE_FLAG_SRIOV_CAPABLE,
+ ICE_FLAG_DCB_CAPABLE,
+ ICE_FLAG_DCB_ENA,
+ ICE_FLAG_FD_ENA,
+ ICE_FLAG_PTP_SUPPORTED, /* PTP is supported by NVM */
+ ICE_FLAG_PTP, /* PTP is enabled by software */
+ ICE_FLAG_ADV_FEATURES,
+ ICE_FLAG_TC_MQPRIO, /* support for Multi queue TC */
+ ICE_FLAG_CLS_FLOWER,
+ ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA,
+ ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
+ ICE_FLAG_NO_MEDIA,
+ ICE_FLAG_FW_LLDP_AGENT,
+ ICE_FLAG_MOD_POWER_UNSUPPORTED,
+ ICE_FLAG_PHY_FW_LOAD_FAILED,
+ ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */
+ ICE_FLAG_LEGACY_RX,
+ ICE_FLAG_VF_TRUE_PROMISC_ENA,
+ ICE_FLAG_MDD_AUTO_RESET_VF,
+ ICE_FLAG_VF_VLAN_PRUNING,
+ ICE_FLAG_LINK_LENIENT_MODE_ENA,
+ ICE_FLAG_PLUG_AUX_DEV,
+ ICE_FLAG_UNPLUG_AUX_DEV,
+ ICE_FLAG_MTU_CHANGED,
+ ICE_FLAG_GNSS, /* GNSS successfully initialized */
+ ICE_PF_FLAGS_NBITS /* must be last */
+};
+
+enum ice_misc_thread_tasks {
+ ICE_MISC_THREAD_EXTTS_EVENT,
+ ICE_MISC_THREAD_TX_TSTAMP,
+ ICE_MISC_THREAD_NBITS /* must be last */
+};
+
+struct ice_switchdev_info {
+ struct ice_vsi *control_vsi;
+ struct ice_vsi *uplink_vsi;
+ bool is_running;
+};
+
+struct ice_agg_node {
+ u32 agg_id;
+#define ICE_MAX_VSIS_IN_AGG_NODE 64
+ u32 num_vsis;
+ u8 valid;
+};
+
+struct ice_pf {
+ struct pci_dev *pdev;
+
+ struct devlink_region *nvm_region;
+ struct devlink_region *sram_region;
+ struct devlink_region *devcaps_region;
+
+ /* devlink port data */
+ struct devlink_port devlink_port;
+
+ /* OS reserved IRQ details */
+ struct msix_entry *msix_entries;
+ struct ice_res_tracker *irq_tracker;
+ /* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the
+ * number of MSIX vectors needed for all SR-IOV VFs from the number of
+ * MSIX vectors allowed on this PF.
+ */
+ u16 sriov_base_vector;
+
+ u16 ctrl_vsi_idx; /* control VSI index in pf->vsi array */
+
+ struct ice_vsi **vsi; /* VSIs created by the driver */
+ struct ice_sw *first_sw; /* first switch created by firmware */
+ u16 eswitch_mode; /* current mode of eswitch */
+ struct ice_vfs vfs;
+ DECLARE_BITMAP(features, ICE_F_MAX);
+ DECLARE_BITMAP(state, ICE_STATE_NBITS);
+ DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS);
+ DECLARE_BITMAP(misc_thread, ICE_MISC_THREAD_NBITS);
+ unsigned long *avail_txqs; /* bitmap to track PF Tx queue usage */
+ unsigned long *avail_rxqs; /* bitmap to track PF Rx queue usage */
+ unsigned long serv_tmr_period;
+ unsigned long serv_tmr_prev;
+ struct timer_list serv_tmr;
+ struct work_struct serv_task;
+ struct mutex avail_q_mutex; /* protects access to avail_[rx|tx]qs */
+ struct mutex sw_mutex; /* lock for protecting VSI alloc flow */
+ struct mutex tc_mutex; /* lock to protect TC changes */
+ struct mutex adev_mutex; /* lock to protect aux device access */
+ u32 msg_enable;
+ struct ice_ptp ptp;
+ struct tty_driver *ice_gnss_tty_driver;
+ struct tty_port *gnss_tty_port[ICE_GNSS_TTY_MINOR_DEVICES];
+ struct gnss_serial *gnss_serial[ICE_GNSS_TTY_MINOR_DEVICES];
+ u16 num_rdma_msix; /* Total MSIX vectors for RDMA driver */
+ u16 rdma_base_vector;
+
+ /* spinlock to protect the AdminQ wait list */
+ spinlock_t aq_wait_lock;
+ struct hlist_head aq_wait_list;
+ wait_queue_head_t aq_wait_queue;
+ bool fw_emp_reset_disabled;
+
+ wait_queue_head_t reset_wait_queue;
+
+ u32 hw_csum_rx_error;
+ u32 oicr_err_reg;
+ u16 oicr_idx; /* Other interrupt cause MSIX vector index */
+ u16 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */
+ u16 max_pf_txqs; /* Total Tx queues PF wide */
+ u16 max_pf_rxqs; /* Total Rx queues PF wide */
+ u16 num_lan_msix; /* Total MSIX vectors for base driver */
+ u16 num_lan_tx; /* num LAN Tx queues setup */
+ u16 num_lan_rx; /* num LAN Rx queues setup */
+ u16 next_vsi; /* Next free slot in pf->vsi[] - 0-based! */
+ u16 num_alloc_vsi;
+ u16 corer_count; /* Core reset count */
+ u16 globr_count; /* Global reset count */
+ u16 empr_count; /* EMP reset count */
+ u16 pfr_count; /* PF reset count */
+
+ u8 wol_ena : 1; /* software state of WoL */
+ u32 wakeup_reason; /* last wakeup reason */
+ struct ice_hw_port_stats stats;
+ struct ice_hw_port_stats stats_prev;
+ struct ice_hw hw;
+ u8 stat_prev_loaded:1; /* has previous stats been loaded */
+ u8 rdma_mode;
+ u16 dcbx_cap;
+ u32 tx_timeout_count;
+ unsigned long tx_timeout_last_recovery;
+ u32 tx_timeout_recovery_level;
+ char int_name[ICE_INT_NAME_STR_LEN];
+ struct auxiliary_device *adev;
+ int aux_idx;
+ u32 sw_int_count;
+ /* count of tc_flower filters specific to channel (aka where filter
+ * action is "hw_tc <tc_num>")
+ */
+ u16 num_dmac_chnl_fltrs;
+ struct hlist_head tc_flower_fltr_list;
+
+ __le64 nvm_phy_type_lo; /* NVM PHY type low */
+ __le64 nvm_phy_type_hi; /* NVM PHY type high */
+ struct ice_link_default_override_tlv link_dflt_override;
+ struct ice_lag *lag; /* Link Aggregation information */
+
+ struct ice_switchdev_info switchdev;
+
+#define ICE_INVALID_AGG_NODE_ID 0
+#define ICE_PF_AGG_NODE_ID_START 1
+#define ICE_MAX_PF_AGG_NODES 32
+ struct ice_agg_node pf_agg_node[ICE_MAX_PF_AGG_NODES];
+#define ICE_VF_AGG_NODE_ID_START 65
+#define ICE_MAX_VF_AGG_NODES 32
+ struct ice_agg_node vf_agg_node[ICE_MAX_VF_AGG_NODES];
+};
+
+struct ice_netdev_priv {
+ struct ice_vsi *vsi;
+ struct ice_repr *repr;
+ /* indirect block callbacks on registered higher level devices
+ * (e.g. tunnel devices)
+ *
+ * tc_indr_block_cb_priv_list is used to look up indirect callback
+ * private data
+ */
+ struct list_head tc_indr_block_priv_list;
+};
+
+/**
+ * ice_vector_ch_enabled
+ * @qv: pointer to q_vector, can be NULL
+ *
+ * This function returns true if vector is channel enabled otherwise false
+ */
+static inline bool ice_vector_ch_enabled(struct ice_q_vector *qv)
+{
+ return !!qv->ch; /* Enable it to run with TC */
+}
+
+/**
+ * ice_irq_dynamic_ena - Enable default interrupt generation settings
+ * @hw: pointer to HW struct
+ * @vsi: pointer to VSI struct, can be NULL
+ * @q_vector: pointer to q_vector, can be NULL
+ */
+static inline void
+ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
+ struct ice_q_vector *q_vector)
+{
+ u32 vector = (vsi && q_vector) ? q_vector->reg_idx :
+ ((struct ice_pf *)hw->back)->oicr_idx;
+ int itr = ICE_ITR_NONE;
+ u32 val;
+
+ /* clear the PBA here, as this function is meant to clean out all
+ * previous interrupts and enable the interrupt
+ */
+ val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
+ (itr << GLINT_DYN_CTL_ITR_INDX_S);
+ if (vsi)
+ if (test_bit(ICE_VSI_DOWN, vsi->state))
+ return;
+ wr32(hw, GLINT_DYN_CTL(vector), val);
+}
+
+/**
+ * ice_netdev_to_pf - Retrieve the PF struct associated with a netdev
+ * @netdev: pointer to the netdev struct
+ */
+static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+
+ return np->vsi->back;
+}
+
+static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi)
+{
+ return !!READ_ONCE(vsi->xdp_prog);
+}
+
+static inline void ice_set_ring_xdp(struct ice_tx_ring *ring)
+{
+ ring->flags |= ICE_TX_FLAGS_RING_XDP;
+}
+
+/**
+ * ice_xsk_pool - get XSK buffer pool bound to a ring
+ * @ring: Rx ring to use
+ *
+ * Returns a pointer to xsk_buff_pool structure if there is a buffer pool
+ * present, NULL otherwise.
+ */
+static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring)
+{
+ struct ice_vsi *vsi = ring->vsi;
+ u16 qid = ring->q_index;
+
+ if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps))
+ return NULL;
+
+ return xsk_get_pool_from_qid(vsi->netdev, qid);
+}
+
+/**
+ * ice_tx_xsk_pool - assign XSK buff pool to XDP ring
+ * @vsi: pointer to VSI
+ * @qid: index of a queue to look at XSK buff pool presence
+ *
+ * Sets XSK buff pool pointer on XDP ring.
+ *
+ * XDP ring is picked from Rx ring, whereas Rx ring is picked based on provided
+ * queue id. Reason for doing so is that queue vectors might have assigned more
+ * than one XDP ring, e.g. when user reduced the queue count on netdev; Rx ring
+ * carries a pointer to one of these XDP rings for its own purposes, such as
+ * handling XDP_TX action, therefore we can piggyback here on the
+ * rx_ring->xdp_ring assignment that was done during XDP rings initialization.
+ */
+static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid)
+{
+ struct ice_tx_ring *ring;
+
+ ring = vsi->rx_rings[qid]->xdp_ring;
+ if (!ring)
+ return;
+
+ if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) {
+ ring->xsk_pool = NULL;
+ return;
+ }
+
+ ring->xsk_pool = xsk_get_pool_from_qid(vsi->netdev, qid);
+}
+
+/**
+ * ice_get_main_vsi - Get the PF VSI
+ * @pf: PF instance
+ *
+ * returns pf->vsi[0], which by definition is the PF VSI
+ */
+static inline struct ice_vsi *ice_get_main_vsi(struct ice_pf *pf)
+{
+ if (pf->vsi)
+ return pf->vsi[0];
+
+ return NULL;
+}
+
+/**
+ * ice_get_netdev_priv_vsi - return VSI associated with netdev priv.
+ * @np: private netdev structure
+ */
+static inline struct ice_vsi *ice_get_netdev_priv_vsi(struct ice_netdev_priv *np)
+{
+ /* In case of port representor return source port VSI. */
+ if (np->repr)
+ return np->repr->src_vsi;
+ else
+ return np->vsi;
+}
+
+/**
+ * ice_get_ctrl_vsi - Get the control VSI
+ * @pf: PF instance
+ */
+static inline struct ice_vsi *ice_get_ctrl_vsi(struct ice_pf *pf)
+{
+ /* if pf->ctrl_vsi_idx is ICE_NO_VSI, control VSI was not set up */
+ if (!pf->vsi || pf->ctrl_vsi_idx == ICE_NO_VSI)
+ return NULL;
+
+ return pf->vsi[pf->ctrl_vsi_idx];
+}
+
+/**
+ * ice_find_vsi - Find the VSI from VSI ID
+ * @pf: The PF pointer to search in
+ * @vsi_num: The VSI ID to search for
+ */
+static inline struct ice_vsi *ice_find_vsi(struct ice_pf *pf, u16 vsi_num)
+{
+ int i;
+
+ ice_for_each_vsi(pf, i)
+ if (pf->vsi[i] && pf->vsi[i]->vsi_num == vsi_num)
+ return pf->vsi[i];
+ return NULL;
+}
+
+/**
+ * ice_is_switchdev_running - check if switchdev is configured
+ * @pf: pointer to PF structure
+ *
+ * Returns true if eswitch mode is set to DEVLINK_ESWITCH_MODE_SWITCHDEV
+ * and switchdev is configured, false otherwise.
+ */
+static inline bool ice_is_switchdev_running(struct ice_pf *pf)
+{
+ return pf->switchdev.is_running;
+}
+
+/**
+ * ice_set_sriov_cap - enable SRIOV in PF flags
+ * @pf: PF struct
+ */
+static inline void ice_set_sriov_cap(struct ice_pf *pf)
+{
+ if (pf->hw.func_caps.common_cap.sr_iov_1_1)
+ set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
+}
+
+/**
+ * ice_clear_sriov_cap - disable SRIOV in PF flags
+ * @pf: PF struct
+ */
+static inline void ice_clear_sriov_cap(struct ice_pf *pf)
+{
+ clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
+}
+
+#define ICE_FD_STAT_CTR_BLOCK_COUNT 256
+#define ICE_FD_STAT_PF_IDX(base_idx) \
+ ((base_idx) * ICE_FD_STAT_CTR_BLOCK_COUNT)
+#define ICE_FD_SB_STAT_IDX(base_idx) ICE_FD_STAT_PF_IDX(base_idx)
+#define ICE_FD_STAT_CH 1
+#define ICE_FD_CH_STAT_IDX(base_idx) \
+ (ICE_FD_STAT_PF_IDX(base_idx) + ICE_FD_STAT_CH)
+
+/**
+ * ice_is_adq_active - any active ADQs
+ * @pf: pointer to PF
+ *
+ * This function returns true if there are any ADQs configured (which is
+ * determined by looking at VSI type (which should be VSI_PF), numtc, and
+ * TC_MQPRIO flag) otherwise return false
+ */
+static inline bool ice_is_adq_active(struct ice_pf *pf)
+{
+ struct ice_vsi *vsi;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi)
+ return false;
+
+ /* is ADQ configured */
+ if (vsi->tc_cfg.numtc > ICE_CHNL_START_TC &&
+ test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+ return true;
+
+ return false;
+}
+
+bool netif_is_ice(struct net_device *dev);
+int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
+int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
+int ice_vsi_open_ctrl(struct ice_vsi *vsi);
+int ice_vsi_open(struct ice_vsi *vsi);
+void ice_set_ethtool_ops(struct net_device *netdev);
+void ice_set_ethtool_repr_ops(struct net_device *netdev);
+void ice_set_ethtool_safe_mode_ops(struct net_device *netdev);
+u16 ice_get_avail_txq_count(struct ice_pf *pf);
+u16 ice_get_avail_rxq_count(struct ice_pf *pf);
+int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked);
+void ice_update_vsi_stats(struct ice_vsi *vsi);
+void ice_update_pf_stats(struct ice_pf *pf);
+void
+ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp,
+ struct ice_q_stats stats, u64 *pkts, u64 *bytes);
+int ice_up(struct ice_vsi *vsi);
+int ice_down(struct ice_vsi *vsi);
+int ice_down_up(struct ice_vsi *vsi);
+int ice_vsi_cfg(struct ice_vsi *vsi);
+struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
+int ice_vsi_determine_xdp_res(struct ice_vsi *vsi);
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog);
+int ice_destroy_xdp_rings(struct ice_vsi *vsi);
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
+int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size);
+int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size);
+int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed);
+int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed);
+void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
+int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset);
+void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
+int ice_plug_aux_dev(struct ice_pf *pf);
+void ice_unplug_aux_dev(struct ice_pf *pf);
+int ice_init_rdma(struct ice_pf *pf);
+const char *ice_aq_str(enum ice_aq_err aq_err);
+bool ice_is_wol_supported(struct ice_hw *hw);
+void ice_fdir_del_all_fltrs(struct ice_vsi *vsi);
+int
+ice_fdir_write_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool add,
+ bool is_tun);
+void ice_vsi_manage_fdir(struct ice_vsi *vsi, bool ena);
+int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd);
+int ice_del_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd);
+int ice_get_ethtool_fdir_entry(struct ice_hw *hw, struct ethtool_rxnfc *cmd);
+int
+ice_get_fdir_fltr_ids(struct ice_hw *hw, struct ethtool_rxnfc *cmd,
+ u32 *rule_locs);
+void ice_fdir_rem_adq_chnl(struct ice_hw *hw, u16 vsi_idx);
+void ice_fdir_release_flows(struct ice_hw *hw);
+void ice_fdir_replay_flows(struct ice_hw *hw);
+void ice_fdir_replay_fltrs(struct ice_pf *pf);
+int ice_fdir_create_dflt_rules(struct ice_pf *pf);
+int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
+ struct ice_rq_event_info *event);
+int ice_open(struct net_device *netdev);
+int ice_open_internal(struct net_device *netdev);
+int ice_stop(struct net_device *netdev);
+void ice_service_task_schedule(struct ice_pf *pf);
+
+/**
+ * ice_set_rdma_cap - enable RDMA support
+ * @pf: PF struct
+ */
+static inline void ice_set_rdma_cap(struct ice_pf *pf)
+{
+ if (pf->hw.func_caps.common_cap.rdma && pf->num_rdma_msix) {
+ set_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+ set_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags);
+ }
+}
+
+/**
+ * ice_clear_rdma_cap - disable RDMA support
+ * @pf: PF struct
+ */
+static inline void ice_clear_rdma_cap(struct ice_pf *pf)
+{
+ /* defer unplug to service task to avoid RTNL lock and
+ * clear PLUG bit so that pending plugs don't interfere
+ */
+ clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags);
+ set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags);
+ clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+}
+#endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
new file mode 100644
index 000000000..1bdc70aa9
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -0,0 +1,2363 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_ADMINQ_CMD_H_
+#define _ICE_ADMINQ_CMD_H_
+
+/* This header file defines the Admin Queue commands, error codes and
+ * descriptor format. It is shared between Firmware and Software.
+ */
+
+#define ICE_MAX_VSI 768
+#define ICE_AQC_TOPO_MAX_LEVEL_NUM 0x9
+#define ICE_AQ_SET_MAC_FRAME_SIZE_MAX 9728
+
+struct ice_aqc_generic {
+ __le32 param0;
+ __le32 param1;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Get version (direct 0x0001) */
+struct ice_aqc_get_ver {
+ __le32 rom_ver;
+ __le32 fw_build;
+ u8 fw_branch;
+ u8 fw_major;
+ u8 fw_minor;
+ u8 fw_patch;
+ u8 api_branch;
+ u8 api_major;
+ u8 api_minor;
+ u8 api_patch;
+};
+
+/* Send driver version (indirect 0x0002) */
+struct ice_aqc_driver_ver {
+ u8 major_ver;
+ u8 minor_ver;
+ u8 build_ver;
+ u8 subbuild_ver;
+ u8 reserved[4];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Queue Shutdown (direct 0x0003) */
+struct ice_aqc_q_shutdown {
+ u8 driver_unloading;
+#define ICE_AQC_DRIVER_UNLOADING BIT(0)
+ u8 reserved[15];
+};
+
+/* Request resource ownership (direct 0x0008)
+ * Release resource ownership (direct 0x0009)
+ */
+struct ice_aqc_req_res {
+ __le16 res_id;
+#define ICE_AQC_RES_ID_NVM 1
+#define ICE_AQC_RES_ID_SDP 2
+#define ICE_AQC_RES_ID_CHNG_LOCK 3
+#define ICE_AQC_RES_ID_GLBL_LOCK 4
+ __le16 access_type;
+#define ICE_AQC_RES_ACCESS_READ 1
+#define ICE_AQC_RES_ACCESS_WRITE 2
+
+ /* Upon successful completion, FW writes this value and driver is
+ * expected to release resource before timeout. This value is provided
+ * in milliseconds.
+ */
+ __le32 timeout;
+#define ICE_AQ_RES_NVM_READ_DFLT_TIMEOUT_MS 3000
+#define ICE_AQ_RES_NVM_WRITE_DFLT_TIMEOUT_MS 180000
+#define ICE_AQ_RES_CHNG_LOCK_DFLT_TIMEOUT_MS 1000
+#define ICE_AQ_RES_GLBL_LOCK_DFLT_TIMEOUT_MS 3000
+ /* For SDP: pin ID of the SDP */
+ __le32 res_number;
+ /* Status is only used for ICE_AQC_RES_ID_GLBL_LOCK */
+ __le16 status;
+#define ICE_AQ_RES_GLBL_SUCCESS 0
+#define ICE_AQ_RES_GLBL_IN_PROG 1
+#define ICE_AQ_RES_GLBL_DONE 2
+ u8 reserved[2];
+};
+
+/* Get function capabilities (indirect 0x000A)
+ * Get device capabilities (indirect 0x000B)
+ */
+struct ice_aqc_list_caps {
+ u8 cmd_flags;
+ u8 pf_index;
+ u8 reserved[2];
+ __le32 count;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Device/Function buffer entry, repeated per reported capability */
+struct ice_aqc_list_caps_elem {
+ __le16 cap;
+#define ICE_AQC_CAPS_VALID_FUNCTIONS 0x0005
+#define ICE_AQC_CAPS_SRIOV 0x0012
+#define ICE_AQC_CAPS_VF 0x0013
+#define ICE_AQC_CAPS_VSI 0x0017
+#define ICE_AQC_CAPS_DCB 0x0018
+#define ICE_AQC_CAPS_RSS 0x0040
+#define ICE_AQC_CAPS_RXQS 0x0041
+#define ICE_AQC_CAPS_TXQS 0x0042
+#define ICE_AQC_CAPS_MSIX 0x0043
+#define ICE_AQC_CAPS_FD 0x0045
+#define ICE_AQC_CAPS_1588 0x0046
+#define ICE_AQC_CAPS_MAX_MTU 0x0047
+#define ICE_AQC_CAPS_NVM_VER 0x0048
+#define ICE_AQC_CAPS_PENDING_NVM_VER 0x0049
+#define ICE_AQC_CAPS_OROM_VER 0x004A
+#define ICE_AQC_CAPS_PENDING_OROM_VER 0x004B
+#define ICE_AQC_CAPS_NET_VER 0x004C
+#define ICE_AQC_CAPS_PENDING_NET_VER 0x004D
+#define ICE_AQC_CAPS_RDMA 0x0051
+#define ICE_AQC_CAPS_PCIE_RESET_AVOIDANCE 0x0076
+#define ICE_AQC_CAPS_POST_UPDATE_RESET_RESTRICT 0x0077
+#define ICE_AQC_CAPS_NVM_MGMT 0x0080
+
+ u8 major_ver;
+ u8 minor_ver;
+ /* Number of resources described by this capability */
+ __le32 number;
+ /* Only meaningful for some types of resources */
+ __le32 logical_id;
+ /* Only meaningful for some types of resources */
+ __le32 phys_id;
+ __le64 rsvd1;
+ __le64 rsvd2;
+};
+
+/* Manage MAC address, read command - indirect (0x0107)
+ * This struct is also used for the response
+ */
+struct ice_aqc_manage_mac_read {
+ __le16 flags; /* Zeroed by device driver */
+#define ICE_AQC_MAN_MAC_LAN_ADDR_VALID BIT(4)
+#define ICE_AQC_MAN_MAC_SAN_ADDR_VALID BIT(5)
+#define ICE_AQC_MAN_MAC_PORT_ADDR_VALID BIT(6)
+#define ICE_AQC_MAN_MAC_WOL_ADDR_VALID BIT(7)
+#define ICE_AQC_MAN_MAC_READ_S 4
+#define ICE_AQC_MAN_MAC_READ_M (0xF << ICE_AQC_MAN_MAC_READ_S)
+ u8 rsvd[2];
+ u8 num_addr; /* Used in response */
+ u8 rsvd1[3];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Response buffer format for manage MAC read command */
+struct ice_aqc_manage_mac_read_resp {
+ u8 lport_num;
+ u8 addr_type;
+#define ICE_AQC_MAN_MAC_ADDR_TYPE_LAN 0
+#define ICE_AQC_MAN_MAC_ADDR_TYPE_WOL 1
+ u8 mac_addr[ETH_ALEN];
+};
+
+/* Manage MAC address, write command - direct (0x0108) */
+struct ice_aqc_manage_mac_write {
+ u8 rsvd;
+ u8 flags;
+#define ICE_AQC_MAN_MAC_WR_MC_MAG_EN BIT(0)
+#define ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP BIT(1)
+#define ICE_AQC_MAN_MAC_WR_S 6
+#define ICE_AQC_MAN_MAC_WR_M ICE_M(3, ICE_AQC_MAN_MAC_WR_S)
+#define ICE_AQC_MAN_MAC_UPDATE_LAA 0
+#define ICE_AQC_MAN_MAC_UPDATE_LAA_WOL BIT(ICE_AQC_MAN_MAC_WR_S)
+ /* byte stream in network order */
+ u8 mac_addr[ETH_ALEN];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Clear PXE Command and response (direct 0x0110) */
+struct ice_aqc_clear_pxe {
+ u8 rx_cnt;
+#define ICE_AQC_CLEAR_PXE_RX_CNT 0x2
+ u8 reserved[15];
+};
+
+/* Get switch configuration (0x0200) */
+struct ice_aqc_get_sw_cfg {
+ /* Reserved for command and copy of request flags for response */
+ __le16 flags;
+ /* First desc in case of command and next_elem in case of response
+ * In case of response, if it is not zero, means all the configuration
+ * was not returned and new command shall be sent with this value in
+ * the 'first desc' field
+ */
+ __le16 element;
+ /* Reserved for command, only used for response */
+ __le16 num_elems;
+ __le16 rsvd;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Each entry in the response buffer is of the following type: */
+struct ice_aqc_get_sw_cfg_resp_elem {
+ /* VSI/Port Number */
+ __le16 vsi_port_num;
+#define ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_S 0
+#define ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_M \
+ (0x3FF << ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_S)
+#define ICE_AQC_GET_SW_CONF_RESP_TYPE_S 14
+#define ICE_AQC_GET_SW_CONF_RESP_TYPE_M (0x3 << ICE_AQC_GET_SW_CONF_RESP_TYPE_S)
+#define ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT 0
+#define ICE_AQC_GET_SW_CONF_RESP_VIRT_PORT 1
+#define ICE_AQC_GET_SW_CONF_RESP_VSI 2
+
+ /* SWID VSI/Port belongs to */
+ __le16 swid;
+
+ /* Bit 14..0 : PF/VF number VSI belongs to
+ * Bit 15 : VF indication bit
+ */
+ __le16 pf_vf_num;
+#define ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_S 0
+#define ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_M \
+ (0x7FFF << ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_S)
+#define ICE_AQC_GET_SW_CONF_RESP_IS_VF BIT(15)
+};
+
+/* Set Port parameters, (direct, 0x0203) */
+struct ice_aqc_set_port_params {
+ __le16 cmd_flags;
+#define ICE_AQC_SET_P_PARAMS_DOUBLE_VLAN_ENA BIT(2)
+ __le16 bad_frame_vsi;
+ __le16 swid;
+ u8 reserved[10];
+};
+
+/* These resource type defines are used for all switch resource
+ * commands where a resource type is required, such as:
+ * Get Resource Allocation command (indirect 0x0204)
+ * Allocate Resources command (indirect 0x0208)
+ * Free Resources command (indirect 0x0209)
+ * Get Allocated Resource Descriptors Command (indirect 0x020A)
+ */
+#define ICE_AQC_RES_TYPE_VSI_LIST_REP 0x03
+#define ICE_AQC_RES_TYPE_VSI_LIST_PRUNE 0x04
+#define ICE_AQC_RES_TYPE_RECIPE 0x05
+#define ICE_AQC_RES_TYPE_FDIR_COUNTER_BLOCK 0x21
+#define ICE_AQC_RES_TYPE_FDIR_GUARANTEED_ENTRIES 0x22
+#define ICE_AQC_RES_TYPE_FDIR_SHARED_ENTRIES 0x23
+#define ICE_AQC_RES_TYPE_FD_PROF_BLDR_PROFID 0x58
+#define ICE_AQC_RES_TYPE_FD_PROF_BLDR_TCAM 0x59
+#define ICE_AQC_RES_TYPE_HASH_PROF_BLDR_PROFID 0x60
+#define ICE_AQC_RES_TYPE_HASH_PROF_BLDR_TCAM 0x61
+
+#define ICE_AQC_RES_TYPE_FLAG_SHARED BIT(7)
+#define ICE_AQC_RES_TYPE_FLAG_SCAN_BOTTOM BIT(12)
+#define ICE_AQC_RES_TYPE_FLAG_IGNORE_INDEX BIT(13)
+
+#define ICE_AQC_RES_TYPE_FLAG_DEDICATED 0x00
+
+#define ICE_AQC_RES_TYPE_S 0
+#define ICE_AQC_RES_TYPE_M (0x07F << ICE_AQC_RES_TYPE_S)
+
+/* Allocate Resources command (indirect 0x0208)
+ * Free Resources command (indirect 0x0209)
+ */
+struct ice_aqc_alloc_free_res_cmd {
+ __le16 num_entries; /* Number of Resource entries */
+ u8 reserved[6];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Resource descriptor */
+struct ice_aqc_res_elem {
+ union {
+ __le16 sw_resp;
+ __le16 flu_resp;
+ } e;
+};
+
+/* Buffer for Allocate/Free Resources commands */
+struct ice_aqc_alloc_free_res_elem {
+ __le16 res_type; /* Types defined above cmd 0x0204 */
+#define ICE_AQC_RES_TYPE_SHARED_S 7
+#define ICE_AQC_RES_TYPE_SHARED_M (0x1 << ICE_AQC_RES_TYPE_SHARED_S)
+#define ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_S 8
+#define ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_M \
+ (0xF << ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_S)
+ __le16 num_elems;
+ struct ice_aqc_res_elem elem[];
+};
+
+/* Request buffer for Set VLAN Mode AQ command (indirect 0x020C) */
+struct ice_aqc_set_vlan_mode {
+ u8 reserved;
+ u8 l2tag_prio_tagging;
+#define ICE_AQ_VLAN_PRIO_TAG_S 0
+#define ICE_AQ_VLAN_PRIO_TAG_M (0x7 << ICE_AQ_VLAN_PRIO_TAG_S)
+#define ICE_AQ_VLAN_PRIO_TAG_NOT_SUPPORTED 0x0
+#define ICE_AQ_VLAN_PRIO_TAG_STAG 0x1
+#define ICE_AQ_VLAN_PRIO_TAG_OUTER_CTAG 0x2
+#define ICE_AQ_VLAN_PRIO_TAG_OUTER_VLAN 0x3
+#define ICE_AQ_VLAN_PRIO_TAG_INNER_CTAG 0x4
+#define ICE_AQ_VLAN_PRIO_TAG_MAX 0x4
+#define ICE_AQ_VLAN_PRIO_TAG_ERROR 0x7
+ u8 l2tag_reserved[64];
+ u8 rdma_packet;
+#define ICE_AQ_VLAN_RDMA_TAG_S 0
+#define ICE_AQ_VLAN_RDMA_TAG_M (0x3F << ICE_AQ_VLAN_RDMA_TAG_S)
+#define ICE_AQ_SVM_VLAN_RDMA_PKT_FLAG_SETTING 0x10
+#define ICE_AQ_DVM_VLAN_RDMA_PKT_FLAG_SETTING 0x1A
+ u8 rdma_reserved[2];
+ u8 mng_vlan_prot_id;
+#define ICE_AQ_VLAN_MNG_PROTOCOL_ID_OUTER 0x10
+#define ICE_AQ_VLAN_MNG_PROTOCOL_ID_INNER 0x11
+ u8 prot_id_reserved[30];
+};
+
+/* Response buffer for Get VLAN Mode AQ command (indirect 0x020D) */
+struct ice_aqc_get_vlan_mode {
+ u8 vlan_mode;
+#define ICE_AQ_VLAN_MODE_DVM_ENA BIT(0)
+ u8 l2tag_prio_tagging;
+ u8 reserved[98];
+};
+
+/* Add VSI (indirect 0x0210)
+ * Update VSI (indirect 0x0211)
+ * Get VSI (indirect 0x0212)
+ * Free VSI (indirect 0x0213)
+ */
+struct ice_aqc_add_get_update_free_vsi {
+ __le16 vsi_num;
+#define ICE_AQ_VSI_NUM_S 0
+#define ICE_AQ_VSI_NUM_M (0x03FF << ICE_AQ_VSI_NUM_S)
+#define ICE_AQ_VSI_IS_VALID BIT(15)
+ __le16 cmd_flags;
+#define ICE_AQ_VSI_KEEP_ALLOC 0x1
+ u8 vf_id;
+ u8 reserved;
+ __le16 vsi_flags;
+#define ICE_AQ_VSI_TYPE_S 0
+#define ICE_AQ_VSI_TYPE_M (0x3 << ICE_AQ_VSI_TYPE_S)
+#define ICE_AQ_VSI_TYPE_VF 0x0
+#define ICE_AQ_VSI_TYPE_VMDQ2 0x1
+#define ICE_AQ_VSI_TYPE_PF 0x2
+#define ICE_AQ_VSI_TYPE_EMP_MNG 0x3
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Response descriptor for:
+ * Add VSI (indirect 0x0210)
+ * Update VSI (indirect 0x0211)
+ * Free VSI (indirect 0x0213)
+ */
+struct ice_aqc_add_update_free_vsi_resp {
+ __le16 vsi_num;
+ __le16 ext_status;
+ __le16 vsi_used;
+ __le16 vsi_free;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+struct ice_aqc_vsi_props {
+ __le16 valid_sections;
+#define ICE_AQ_VSI_PROP_SW_VALID BIT(0)
+#define ICE_AQ_VSI_PROP_SECURITY_VALID BIT(1)
+#define ICE_AQ_VSI_PROP_VLAN_VALID BIT(2)
+#define ICE_AQ_VSI_PROP_OUTER_TAG_VALID BIT(3)
+#define ICE_AQ_VSI_PROP_INGRESS_UP_VALID BIT(4)
+#define ICE_AQ_VSI_PROP_EGRESS_UP_VALID BIT(5)
+#define ICE_AQ_VSI_PROP_RXQ_MAP_VALID BIT(6)
+#define ICE_AQ_VSI_PROP_Q_OPT_VALID BIT(7)
+#define ICE_AQ_VSI_PROP_OUTER_UP_VALID BIT(8)
+#define ICE_AQ_VSI_PROP_FLOW_DIR_VALID BIT(11)
+#define ICE_AQ_VSI_PROP_PASID_VALID BIT(12)
+ /* switch section */
+ u8 sw_id;
+ u8 sw_flags;
+#define ICE_AQ_VSI_SW_FLAG_ALLOW_LB BIT(5)
+#define ICE_AQ_VSI_SW_FLAG_LOCAL_LB BIT(6)
+#define ICE_AQ_VSI_SW_FLAG_SRC_PRUNE BIT(7)
+ u8 sw_flags2;
+#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S 0
+#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_M (0xF << ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S)
+#define ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA BIT(0)
+#define ICE_AQ_VSI_SW_FLAG_LAN_ENA BIT(4)
+ u8 veb_stat_id;
+#define ICE_AQ_VSI_SW_VEB_STAT_ID_S 0
+#define ICE_AQ_VSI_SW_VEB_STAT_ID_M (0x1F << ICE_AQ_VSI_SW_VEB_STAT_ID_S)
+#define ICE_AQ_VSI_SW_VEB_STAT_ID_VALID BIT(5)
+ /* security section */
+ u8 sec_flags;
+#define ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD BIT(0)
+#define ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF BIT(2)
+#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S 4
+#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_M (0xF << ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S)
+#define ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA BIT(0)
+ u8 sec_reserved;
+ /* VLAN section */
+ __le16 port_based_inner_vlan; /* VLANS include priority bits */
+ u8 inner_vlan_reserved[2];
+ u8 inner_vlan_flags;
+#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_S 0
+#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_M (0x3 << ICE_AQ_VSI_INNER_VLAN_TX_MODE_S)
+#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_ACCEPTUNTAGGED 0x1
+#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_ACCEPTTAGGED 0x2
+#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL 0x3
+#define ICE_AQ_VSI_INNER_VLAN_INSERT_PVID BIT(2)
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_S 3
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_M (0x3 << ICE_AQ_VSI_INNER_VLAN_EMODE_S)
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH (0x0 << ICE_AQ_VSI_INNER_VLAN_EMODE_S)
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_UP (0x1 << ICE_AQ_VSI_INNER_VLAN_EMODE_S)
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR (0x2 << ICE_AQ_VSI_INNER_VLAN_EMODE_S)
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING (0x3 << ICE_AQ_VSI_INNER_VLAN_EMODE_S)
+ u8 inner_vlan_reserved2[3];
+ /* ingress egress up sections */
+ __le32 ingress_table; /* bitmap, 3 bits per up */
+#define ICE_AQ_VSI_UP_TABLE_UP0_S 0
+#define ICE_AQ_VSI_UP_TABLE_UP0_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP0_S)
+#define ICE_AQ_VSI_UP_TABLE_UP1_S 3
+#define ICE_AQ_VSI_UP_TABLE_UP1_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP1_S)
+#define ICE_AQ_VSI_UP_TABLE_UP2_S 6
+#define ICE_AQ_VSI_UP_TABLE_UP2_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP2_S)
+#define ICE_AQ_VSI_UP_TABLE_UP3_S 9
+#define ICE_AQ_VSI_UP_TABLE_UP3_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP3_S)
+#define ICE_AQ_VSI_UP_TABLE_UP4_S 12
+#define ICE_AQ_VSI_UP_TABLE_UP4_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP4_S)
+#define ICE_AQ_VSI_UP_TABLE_UP5_S 15
+#define ICE_AQ_VSI_UP_TABLE_UP5_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP5_S)
+#define ICE_AQ_VSI_UP_TABLE_UP6_S 18
+#define ICE_AQ_VSI_UP_TABLE_UP6_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP6_S)
+#define ICE_AQ_VSI_UP_TABLE_UP7_S 21
+#define ICE_AQ_VSI_UP_TABLE_UP7_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP7_S)
+ __le32 egress_table; /* same defines as for ingress table */
+ /* outer tags section */
+ __le16 port_based_outer_vlan;
+ u8 outer_vlan_flags;
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_S 0
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_M (0x3 << ICE_AQ_VSI_OUTER_VLAN_EMODE_S)
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW_BOTH 0x0
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW_UP 0x1
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW 0x2
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_NOTHING 0x3
+#define ICE_AQ_VSI_OUTER_TAG_TYPE_S 2
+#define ICE_AQ_VSI_OUTER_TAG_TYPE_M (0x3 << ICE_AQ_VSI_OUTER_TAG_TYPE_S)
+#define ICE_AQ_VSI_OUTER_TAG_NONE 0x0
+#define ICE_AQ_VSI_OUTER_TAG_STAG 0x1
+#define ICE_AQ_VSI_OUTER_TAG_VLAN_8100 0x2
+#define ICE_AQ_VSI_OUTER_TAG_VLAN_9100 0x3
+#define ICE_AQ_VSI_OUTER_VLAN_PORT_BASED_INSERT BIT(4)
+#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S 5
+#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M (0x3 << ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S)
+#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ACCEPTUNTAGGED 0x1
+#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ACCEPTTAGGED 0x2
+#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL 0x3
+#define ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC BIT(7)
+ u8 outer_vlan_reserved;
+ /* queue mapping section */
+ __le16 mapping_flags;
+#define ICE_AQ_VSI_Q_MAP_CONTIG 0x0
+#define ICE_AQ_VSI_Q_MAP_NONCONTIG BIT(0)
+ __le16 q_mapping[16];
+#define ICE_AQ_VSI_Q_S 0
+#define ICE_AQ_VSI_Q_M (0x7FF << ICE_AQ_VSI_Q_S)
+ __le16 tc_mapping[8];
+#define ICE_AQ_VSI_TC_Q_OFFSET_S 0
+#define ICE_AQ_VSI_TC_Q_OFFSET_M (0x7FF << ICE_AQ_VSI_TC_Q_OFFSET_S)
+#define ICE_AQ_VSI_TC_Q_NUM_S 11
+#define ICE_AQ_VSI_TC_Q_NUM_M (0xF << ICE_AQ_VSI_TC_Q_NUM_S)
+ /* queueing option section */
+ u8 q_opt_rss;
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_S 0
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_M (0x3 << ICE_AQ_VSI_Q_OPT_RSS_LUT_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI 0x0
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_PF 0x2
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_GBL 0x3
+#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S 2
+#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M (0xF << ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_HASH_S 6
+#define ICE_AQ_VSI_Q_OPT_RSS_HASH_M (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_TPLZ (0x0 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_SYM_TPLZ (0x1 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_XOR (0x2 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_JHASH (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+ u8 q_opt_tc;
+#define ICE_AQ_VSI_Q_OPT_TC_OVR_S 0
+#define ICE_AQ_VSI_Q_OPT_TC_OVR_M (0x1F << ICE_AQ_VSI_Q_OPT_TC_OVR_S)
+#define ICE_AQ_VSI_Q_OPT_PROF_TC_OVR BIT(7)
+ u8 q_opt_flags;
+#define ICE_AQ_VSI_Q_OPT_PE_FLTR_EN BIT(0)
+ u8 q_opt_reserved[3];
+ /* outer up section */
+ __le32 outer_up_table; /* same structure and defines as ingress tbl */
+ /* section 10 */
+ __le16 sect_10_reserved;
+ /* flow director section */
+ __le16 fd_options;
+#define ICE_AQ_VSI_FD_ENABLE BIT(0)
+#define ICE_AQ_VSI_FD_TX_AUTO_ENABLE BIT(1)
+#define ICE_AQ_VSI_FD_PROG_ENABLE BIT(3)
+ __le16 max_fd_fltr_dedicated;
+ __le16 max_fd_fltr_shared;
+ __le16 fd_def_q;
+#define ICE_AQ_VSI_FD_DEF_Q_S 0
+#define ICE_AQ_VSI_FD_DEF_Q_M (0x7FF << ICE_AQ_VSI_FD_DEF_Q_S)
+#define ICE_AQ_VSI_FD_DEF_GRP_S 12
+#define ICE_AQ_VSI_FD_DEF_GRP_M (0x7 << ICE_AQ_VSI_FD_DEF_GRP_S)
+ __le16 fd_report_opt;
+#define ICE_AQ_VSI_FD_REPORT_Q_S 0
+#define ICE_AQ_VSI_FD_REPORT_Q_M (0x7FF << ICE_AQ_VSI_FD_REPORT_Q_S)
+#define ICE_AQ_VSI_FD_DEF_PRIORITY_S 12
+#define ICE_AQ_VSI_FD_DEF_PRIORITY_M (0x7 << ICE_AQ_VSI_FD_DEF_PRIORITY_S)
+#define ICE_AQ_VSI_FD_DEF_DROP BIT(15)
+ /* PASID section */
+ __le32 pasid_id;
+#define ICE_AQ_VSI_PASID_ID_S 0
+#define ICE_AQ_VSI_PASID_ID_M (0xFFFFF << ICE_AQ_VSI_PASID_ID_S)
+#define ICE_AQ_VSI_PASID_ID_VALID BIT(31)
+ u8 reserved[24];
+};
+
+#define ICE_MAX_NUM_RECIPES 64
+
+/* Add/Get Recipe (indirect 0x0290/0x0292) */
+struct ice_aqc_add_get_recipe {
+ __le16 num_sub_recipes; /* Input in Add cmd, Output in Get cmd */
+ __le16 return_index; /* Input, used for Get cmd only */
+ u8 reserved[4];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+struct ice_aqc_recipe_content {
+ u8 rid;
+#define ICE_AQ_RECIPE_ID_S 0
+#define ICE_AQ_RECIPE_ID_M (0x3F << ICE_AQ_RECIPE_ID_S)
+#define ICE_AQ_RECIPE_ID_IS_ROOT BIT(7)
+#define ICE_AQ_SW_ID_LKUP_IDX 0
+ u8 lkup_indx[5];
+#define ICE_AQ_RECIPE_LKUP_DATA_S 0
+#define ICE_AQ_RECIPE_LKUP_DATA_M (0x3F << ICE_AQ_RECIPE_LKUP_DATA_S)
+#define ICE_AQ_RECIPE_LKUP_IGNORE BIT(7)
+#define ICE_AQ_SW_ID_LKUP_MASK 0x00FF
+ __le16 mask[5];
+ u8 result_indx;
+#define ICE_AQ_RECIPE_RESULT_DATA_S 0
+#define ICE_AQ_RECIPE_RESULT_DATA_M (0x3F << ICE_AQ_RECIPE_RESULT_DATA_S)
+#define ICE_AQ_RECIPE_RESULT_EN BIT(7)
+ u8 rsvd0[3];
+ u8 act_ctrl_join_priority;
+ u8 act_ctrl_fwd_priority;
+#define ICE_AQ_RECIPE_FWD_PRIORITY_S 0
+#define ICE_AQ_RECIPE_FWD_PRIORITY_M (0xF << ICE_AQ_RECIPE_FWD_PRIORITY_S)
+ u8 act_ctrl;
+#define ICE_AQ_RECIPE_ACT_NEED_PASS_L2 BIT(0)
+#define ICE_AQ_RECIPE_ACT_ALLOW_PASS_L2 BIT(1)
+#define ICE_AQ_RECIPE_ACT_INV_ACT BIT(2)
+#define ICE_AQ_RECIPE_ACT_PRUNE_INDX_S 4
+#define ICE_AQ_RECIPE_ACT_PRUNE_INDX_M (0x3 << ICE_AQ_RECIPE_ACT_PRUNE_INDX_S)
+ u8 rsvd1;
+ __le32 dflt_act;
+#define ICE_AQ_RECIPE_DFLT_ACT_S 0
+#define ICE_AQ_RECIPE_DFLT_ACT_M (0x7FFFF << ICE_AQ_RECIPE_DFLT_ACT_S)
+#define ICE_AQ_RECIPE_DFLT_ACT_VALID BIT(31)
+};
+
+struct ice_aqc_recipe_data_elem {
+ u8 recipe_indx;
+ u8 resp_bits;
+#define ICE_AQ_RECIPE_WAS_UPDATED BIT(0)
+ u8 rsvd0[2];
+ u8 recipe_bitmap[8];
+ u8 rsvd1[4];
+ struct ice_aqc_recipe_content content;
+ u8 rsvd2[20];
+};
+
+/* Set/Get Recipes to Profile Association (direct 0x0291/0x0293) */
+struct ice_aqc_recipe_to_profile {
+ __le16 profile_id;
+ u8 rsvd[6];
+ DECLARE_BITMAP(recipe_assoc, ICE_MAX_NUM_RECIPES);
+};
+
+/* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3)
+ */
+struct ice_aqc_sw_rules {
+ /* ops: add switch rules, referring the number of rules.
+ * ops: update switch rules, referring the number of filters
+ * ops: remove switch rules, referring the entry index.
+ * ops: get switch rules, referring to the number of filters.
+ */
+ __le16 num_rules_fltr_entry_index;
+ u8 reserved[6];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Add switch rule response:
+ * Content of return buffer is same as the input buffer. The status field and
+ * LUT index are updated as part of the response
+ */
+struct ice_aqc_sw_rules_elem_hdr {
+ __le16 type; /* Switch rule type, one of T_... */
+#define ICE_AQC_SW_RULES_T_LKUP_RX 0x0
+#define ICE_AQC_SW_RULES_T_LKUP_TX 0x1
+#define ICE_AQC_SW_RULES_T_LG_ACT 0x2
+#define ICE_AQC_SW_RULES_T_VSI_LIST_SET 0x3
+#define ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR 0x4
+#define ICE_AQC_SW_RULES_T_PRUNE_LIST_SET 0x5
+#define ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR 0x6
+ __le16 status;
+} __packed __aligned(sizeof(__le16));
+
+/* Add/Update/Get/Remove lookup Rx/Tx command/response entry
+ * This structures describes the lookup rules and associated actions. "index"
+ * is returned as part of a response to a successful Add command, and can be
+ * used to identify the rule for Update/Get/Remove commands.
+ */
+struct ice_sw_rule_lkup_rx_tx {
+ struct ice_aqc_sw_rules_elem_hdr hdr;
+
+ __le16 recipe_id;
+#define ICE_SW_RECIPE_LOGICAL_PORT_FWD 10
+ /* Source port for LOOKUP_RX and source VSI in case of LOOKUP_TX */
+ __le16 src;
+ __le32 act;
+
+ /* Bit 0:1 - Action type */
+#define ICE_SINGLE_ACT_TYPE_S 0x00
+#define ICE_SINGLE_ACT_TYPE_M (0x3 << ICE_SINGLE_ACT_TYPE_S)
+
+ /* Bit 2 - Loop back enable
+ * Bit 3 - LAN enable
+ */
+#define ICE_SINGLE_ACT_LB_ENABLE BIT(2)
+#define ICE_SINGLE_ACT_LAN_ENABLE BIT(3)
+
+ /* Action type = 0 - Forward to VSI or VSI list */
+#define ICE_SINGLE_ACT_VSI_FORWARDING 0x0
+
+#define ICE_SINGLE_ACT_VSI_ID_S 4
+#define ICE_SINGLE_ACT_VSI_ID_M (0x3FF << ICE_SINGLE_ACT_VSI_ID_S)
+#define ICE_SINGLE_ACT_VSI_LIST_ID_S 4
+#define ICE_SINGLE_ACT_VSI_LIST_ID_M (0x3FF << ICE_SINGLE_ACT_VSI_LIST_ID_S)
+ /* This bit needs to be set if action is forward to VSI list */
+#define ICE_SINGLE_ACT_VSI_LIST BIT(14)
+#define ICE_SINGLE_ACT_VALID_BIT BIT(17)
+#define ICE_SINGLE_ACT_DROP BIT(18)
+
+ /* Action type = 1 - Forward to Queue of Queue group */
+#define ICE_SINGLE_ACT_TO_Q 0x1
+#define ICE_SINGLE_ACT_Q_INDEX_S 4
+#define ICE_SINGLE_ACT_Q_INDEX_M (0x7FF << ICE_SINGLE_ACT_Q_INDEX_S)
+#define ICE_SINGLE_ACT_Q_REGION_S 15
+#define ICE_SINGLE_ACT_Q_REGION_M (0x7 << ICE_SINGLE_ACT_Q_REGION_S)
+#define ICE_SINGLE_ACT_Q_PRIORITY BIT(18)
+
+ /* Action type = 2 - Prune */
+#define ICE_SINGLE_ACT_PRUNE 0x2
+#define ICE_SINGLE_ACT_EGRESS BIT(15)
+#define ICE_SINGLE_ACT_INGRESS BIT(16)
+#define ICE_SINGLE_ACT_PRUNET BIT(17)
+ /* Bit 18 should be set to 0 for this action */
+
+ /* Action type = 2 - Pointer */
+#define ICE_SINGLE_ACT_PTR 0x2
+#define ICE_SINGLE_ACT_PTR_VAL_S 4
+#define ICE_SINGLE_ACT_PTR_VAL_M (0x1FFF << ICE_SINGLE_ACT_PTR_VAL_S)
+ /* Bit 18 should be set to 1 */
+#define ICE_SINGLE_ACT_PTR_BIT BIT(18)
+
+ /* Action type = 3 - Other actions. Last two bits
+ * are other action identifier
+ */
+#define ICE_SINGLE_ACT_OTHER_ACTS 0x3
+#define ICE_SINGLE_OTHER_ACT_IDENTIFIER_S 17
+#define ICE_SINGLE_OTHER_ACT_IDENTIFIER_M \
+ (0x3 << ICE_SINGLE_OTHER_ACT_IDENTIFIER_S)
+
+ /* Bit 17:18 - Defines other actions */
+ /* Other action = 0 - Mirror VSI */
+#define ICE_SINGLE_OTHER_ACT_MIRROR 0
+#define ICE_SINGLE_ACT_MIRROR_VSI_ID_S 4
+#define ICE_SINGLE_ACT_MIRROR_VSI_ID_M \
+ (0x3FF << ICE_SINGLE_ACT_MIRROR_VSI_ID_S)
+
+ /* Other action = 3 - Set Stat count */
+#define ICE_SINGLE_OTHER_ACT_STAT_COUNT 3
+#define ICE_SINGLE_ACT_STAT_COUNT_INDEX_S 4
+#define ICE_SINGLE_ACT_STAT_COUNT_INDEX_M \
+ (0x7F << ICE_SINGLE_ACT_STAT_COUNT_INDEX_S)
+
+ __le16 index; /* The index of the rule in the lookup table */
+ /* Length and values of the header to be matched per recipe or
+ * lookup-type
+ */
+ __le16 hdr_len;
+ u8 hdr_data[];
+} __packed __aligned(sizeof(__le16));
+
+/* Add/Update/Remove large action command/response entry
+ * "index" is returned as part of a response to a successful Add command, and
+ * can be used to identify the action for Update/Get/Remove commands.
+ */
+struct ice_sw_rule_lg_act {
+ struct ice_aqc_sw_rules_elem_hdr hdr;
+
+ __le16 index; /* Index in large action table */
+ __le16 size;
+ /* Max number of large actions */
+#define ICE_MAX_LG_ACT 4
+ /* Bit 0:1 - Action type */
+#define ICE_LG_ACT_TYPE_S 0
+#define ICE_LG_ACT_TYPE_M (0x7 << ICE_LG_ACT_TYPE_S)
+
+ /* Action type = 0 - Forward to VSI or VSI list */
+#define ICE_LG_ACT_VSI_FORWARDING 0
+#define ICE_LG_ACT_VSI_ID_S 3
+#define ICE_LG_ACT_VSI_ID_M (0x3FF << ICE_LG_ACT_VSI_ID_S)
+#define ICE_LG_ACT_VSI_LIST_ID_S 3
+#define ICE_LG_ACT_VSI_LIST_ID_M (0x3FF << ICE_LG_ACT_VSI_LIST_ID_S)
+ /* This bit needs to be set if action is forward to VSI list */
+#define ICE_LG_ACT_VSI_LIST BIT(13)
+
+#define ICE_LG_ACT_VALID_BIT BIT(16)
+
+ /* Action type = 1 - Forward to Queue of Queue group */
+#define ICE_LG_ACT_TO_Q 0x1
+#define ICE_LG_ACT_Q_INDEX_S 3
+#define ICE_LG_ACT_Q_INDEX_M (0x7FF << ICE_LG_ACT_Q_INDEX_S)
+#define ICE_LG_ACT_Q_REGION_S 14
+#define ICE_LG_ACT_Q_REGION_M (0x7 << ICE_LG_ACT_Q_REGION_S)
+#define ICE_LG_ACT_Q_PRIORITY_SET BIT(17)
+
+ /* Action type = 2 - Prune */
+#define ICE_LG_ACT_PRUNE 0x2
+#define ICE_LG_ACT_EGRESS BIT(14)
+#define ICE_LG_ACT_INGRESS BIT(15)
+#define ICE_LG_ACT_PRUNET BIT(16)
+
+ /* Action type = 3 - Mirror VSI */
+#define ICE_LG_OTHER_ACT_MIRROR 0x3
+#define ICE_LG_ACT_MIRROR_VSI_ID_S 3
+#define ICE_LG_ACT_MIRROR_VSI_ID_M (0x3FF << ICE_LG_ACT_MIRROR_VSI_ID_S)
+
+ /* Action type = 5 - Generic Value */
+#define ICE_LG_ACT_GENERIC 0x5
+#define ICE_LG_ACT_GENERIC_VALUE_S 3
+#define ICE_LG_ACT_GENERIC_VALUE_M (0xFFFF << ICE_LG_ACT_GENERIC_VALUE_S)
+#define ICE_LG_ACT_GENERIC_OFFSET_S 19
+#define ICE_LG_ACT_GENERIC_OFFSET_M (0x7 << ICE_LG_ACT_GENERIC_OFFSET_S)
+#define ICE_LG_ACT_GENERIC_PRIORITY_S 22
+#define ICE_LG_ACT_GENERIC_PRIORITY_M (0x7 << ICE_LG_ACT_GENERIC_PRIORITY_S)
+#define ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX 7
+
+ /* Action = 7 - Set Stat count */
+#define ICE_LG_ACT_STAT_COUNT 0x7
+#define ICE_LG_ACT_STAT_COUNT_S 3
+#define ICE_LG_ACT_STAT_COUNT_M (0x7F << ICE_LG_ACT_STAT_COUNT_S)
+ __le32 act[]; /* array of size for actions */
+} __packed __aligned(sizeof(__le16));
+
+/* Add/Update/Remove VSI list command/response entry
+ * "index" is returned as part of a response to a successful Add command, and
+ * can be used to identify the VSI list for Update/Get/Remove commands.
+ */
+struct ice_sw_rule_vsi_list {
+ struct ice_aqc_sw_rules_elem_hdr hdr;
+
+ __le16 index; /* Index of VSI/Prune list */
+ __le16 number_vsi;
+ __le16 vsi[]; /* Array of number_vsi VSI numbers */
+} __packed __aligned(sizeof(__le16));
+
+/* Query PFC Mode (direct 0x0302)
+ * Set PFC Mode (direct 0x0303)
+ */
+struct ice_aqc_set_query_pfc_mode {
+ u8 pfc_mode;
+/* For Query Command response, reserved in all other cases */
+#define ICE_AQC_PFC_VLAN_BASED_PFC 1
+#define ICE_AQC_PFC_DSCP_BASED_PFC 2
+ u8 rsvd[15];
+};
+/* Get Default Topology (indirect 0x0400) */
+struct ice_aqc_get_topo {
+ u8 port_num;
+ u8 num_branches;
+ __le16 reserved1;
+ __le32 reserved2;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Update TSE (indirect 0x0403)
+ * Get TSE (indirect 0x0404)
+ * Add TSE (indirect 0x0401)
+ * Delete TSE (indirect 0x040F)
+ * Move TSE (indirect 0x0408)
+ * Suspend Nodes (indirect 0x0409)
+ * Resume Nodes (indirect 0x040A)
+ */
+struct ice_aqc_sched_elem_cmd {
+ __le16 num_elem_req; /* Used by commands */
+ __le16 num_elem_resp; /* Used by responses */
+ __le32 reserved;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+struct ice_aqc_txsched_move_grp_info_hdr {
+ __le32 src_parent_teid;
+ __le32 dest_parent_teid;
+ __le16 num_elems;
+ __le16 reserved;
+};
+
+struct ice_aqc_move_elem {
+ struct ice_aqc_txsched_move_grp_info_hdr hdr;
+ __le32 teid[];
+};
+
+struct ice_aqc_elem_info_bw {
+ __le16 bw_profile_idx;
+ __le16 bw_alloc;
+};
+
+struct ice_aqc_txsched_elem {
+ u8 elem_type; /* Special field, reserved for some aq calls */
+#define ICE_AQC_ELEM_TYPE_UNDEFINED 0x0
+#define ICE_AQC_ELEM_TYPE_ROOT_PORT 0x1
+#define ICE_AQC_ELEM_TYPE_TC 0x2
+#define ICE_AQC_ELEM_TYPE_SE_GENERIC 0x3
+#define ICE_AQC_ELEM_TYPE_ENTRY_POINT 0x4
+#define ICE_AQC_ELEM_TYPE_LEAF 0x5
+#define ICE_AQC_ELEM_TYPE_SE_PADDED 0x6
+ u8 valid_sections;
+#define ICE_AQC_ELEM_VALID_GENERIC BIT(0)
+#define ICE_AQC_ELEM_VALID_CIR BIT(1)
+#define ICE_AQC_ELEM_VALID_EIR BIT(2)
+#define ICE_AQC_ELEM_VALID_SHARED BIT(3)
+ u8 generic;
+#define ICE_AQC_ELEM_GENERIC_MODE_M 0x1
+#define ICE_AQC_ELEM_GENERIC_PRIO_S 0x1
+#define ICE_AQC_ELEM_GENERIC_PRIO_M (0x7 << ICE_AQC_ELEM_GENERIC_PRIO_S)
+#define ICE_AQC_ELEM_GENERIC_SP_S 0x4
+#define ICE_AQC_ELEM_GENERIC_SP_M (0x1 << ICE_AQC_ELEM_GENERIC_SP_S)
+#define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S 0x5
+#define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_M \
+ (0x3 << ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S)
+ u8 flags; /* Special field, reserved for some aq calls */
+#define ICE_AQC_ELEM_FLAG_SUSPEND_M 0x1
+ struct ice_aqc_elem_info_bw cir_bw;
+ struct ice_aqc_elem_info_bw eir_bw;
+ __le16 srl_id;
+ __le16 reserved2;
+};
+
+struct ice_aqc_txsched_elem_data {
+ __le32 parent_teid;
+ __le32 node_teid;
+ struct ice_aqc_txsched_elem data;
+};
+
+struct ice_aqc_txsched_topo_grp_info_hdr {
+ __le32 parent_teid;
+ __le16 num_elems;
+ __le16 reserved2;
+};
+
+struct ice_aqc_add_elem {
+ struct ice_aqc_txsched_topo_grp_info_hdr hdr;
+ struct ice_aqc_txsched_elem_data generic[];
+};
+
+struct ice_aqc_get_topo_elem {
+ struct ice_aqc_txsched_topo_grp_info_hdr hdr;
+ struct ice_aqc_txsched_elem_data
+ generic[ICE_AQC_TOPO_MAX_LEVEL_NUM];
+};
+
+struct ice_aqc_delete_elem {
+ struct ice_aqc_txsched_topo_grp_info_hdr hdr;
+ __le32 teid[];
+};
+
+/* Query Port ETS (indirect 0x040E)
+ *
+ * This indirect command is used to query port TC node configuration.
+ */
+struct ice_aqc_query_port_ets {
+ __le32 port_teid;
+ __le32 reserved;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+struct ice_aqc_port_ets_elem {
+ u8 tc_valid_bits;
+ u8 reserved[3];
+ /* 3 bits for UP per TC 0-7, 4th byte reserved */
+ __le32 up2tc;
+ u8 tc_bw_share[8];
+ __le32 port_eir_prof_id;
+ __le32 port_cir_prof_id;
+ /* 3 bits per Node priority to TC 0-7, 4th byte reserved */
+ __le32 tc_node_prio;
+#define ICE_TC_NODE_PRIO_S 0x4
+ u8 reserved1[4];
+ __le32 tc_node_teid[8]; /* Used for response, reserved in command */
+};
+
+/* Rate limiting profile for
+ * Add RL profile (indirect 0x0410)
+ * Query RL profile (indirect 0x0411)
+ * Remove RL profile (indirect 0x0415)
+ * These indirect commands acts on single or multiple
+ * RL profiles with specified data.
+ */
+struct ice_aqc_rl_profile {
+ __le16 num_profiles;
+ __le16 num_processed; /* Only for response. Reserved in Command. */
+ u8 reserved[4];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+struct ice_aqc_rl_profile_elem {
+ u8 level;
+ u8 flags;
+#define ICE_AQC_RL_PROFILE_TYPE_S 0x0
+#define ICE_AQC_RL_PROFILE_TYPE_M (0x3 << ICE_AQC_RL_PROFILE_TYPE_S)
+#define ICE_AQC_RL_PROFILE_TYPE_CIR 0
+#define ICE_AQC_RL_PROFILE_TYPE_EIR 1
+#define ICE_AQC_RL_PROFILE_TYPE_SRL 2
+/* The following flag is used for Query RL Profile Data */
+#define ICE_AQC_RL_PROFILE_INVAL_S 0x7
+#define ICE_AQC_RL_PROFILE_INVAL_M (0x1 << ICE_AQC_RL_PROFILE_INVAL_S)
+
+ __le16 profile_id;
+ __le16 max_burst_size;
+ __le16 rl_multiply;
+ __le16 wake_up_calc;
+ __le16 rl_encode;
+};
+
+/* Query Scheduler Resource Allocation (indirect 0x0412)
+ * This indirect command retrieves the scheduler resources allocated by
+ * EMP Firmware to the given PF.
+ */
+struct ice_aqc_query_txsched_res {
+ u8 reserved[8];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+struct ice_aqc_generic_sched_props {
+ __le16 phys_levels;
+ __le16 logical_levels;
+ u8 flattening_bitmap;
+ u8 max_device_cgds;
+ u8 max_pf_cgds;
+ u8 rsvd0;
+ __le16 rdma_qsets;
+ u8 rsvd1[22];
+};
+
+struct ice_aqc_layer_props {
+ u8 logical_layer;
+ u8 chunk_size;
+ __le16 max_device_nodes;
+ __le16 max_pf_nodes;
+ u8 rsvd0[4];
+ __le16 max_sibl_grp_sz;
+ __le16 max_cir_rl_profiles;
+ __le16 max_eir_rl_profiles;
+ __le16 max_srl_profiles;
+ u8 rsvd1[14];
+};
+
+struct ice_aqc_query_txsched_res_resp {
+ struct ice_aqc_generic_sched_props sched_props;
+ struct ice_aqc_layer_props layer_props[ICE_AQC_TOPO_MAX_LEVEL_NUM];
+};
+
+/* Get PHY capabilities (indirect 0x0600) */
+struct ice_aqc_get_phy_caps {
+ u8 lport_num;
+ u8 reserved;
+ __le16 param0;
+ /* 18.0 - Report qualified modules */
+#define ICE_AQC_GET_PHY_RQM BIT(0)
+ /* 18.1 - 18.3 : Report mode
+ * 000b - Report NVM capabilities
+ * 001b - Report topology capabilities
+ * 010b - Report SW configured
+ * 100b - Report default capabilities
+ */
+#define ICE_AQC_REPORT_MODE_S 1
+#define ICE_AQC_REPORT_MODE_M (7 << ICE_AQC_REPORT_MODE_S)
+#define ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA 0
+#define ICE_AQC_REPORT_TOPO_CAP_MEDIA BIT(1)
+#define ICE_AQC_REPORT_ACTIVE_CFG BIT(2)
+#define ICE_AQC_REPORT_DFLT_CFG BIT(3)
+ __le32 reserved1;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* This is #define of PHY type (Extended):
+ * The first set of defines is for phy_type_low.
+ */
+#define ICE_PHY_TYPE_LOW_100BASE_TX BIT_ULL(0)
+#define ICE_PHY_TYPE_LOW_100M_SGMII BIT_ULL(1)
+#define ICE_PHY_TYPE_LOW_1000BASE_T BIT_ULL(2)
+#define ICE_PHY_TYPE_LOW_1000BASE_SX BIT_ULL(3)
+#define ICE_PHY_TYPE_LOW_1000BASE_LX BIT_ULL(4)
+#define ICE_PHY_TYPE_LOW_1000BASE_KX BIT_ULL(5)
+#define ICE_PHY_TYPE_LOW_1G_SGMII BIT_ULL(6)
+#define ICE_PHY_TYPE_LOW_2500BASE_T BIT_ULL(7)
+#define ICE_PHY_TYPE_LOW_2500BASE_X BIT_ULL(8)
+#define ICE_PHY_TYPE_LOW_2500BASE_KX BIT_ULL(9)
+#define ICE_PHY_TYPE_LOW_5GBASE_T BIT_ULL(10)
+#define ICE_PHY_TYPE_LOW_5GBASE_KR BIT_ULL(11)
+#define ICE_PHY_TYPE_LOW_10GBASE_T BIT_ULL(12)
+#define ICE_PHY_TYPE_LOW_10G_SFI_DA BIT_ULL(13)
+#define ICE_PHY_TYPE_LOW_10GBASE_SR BIT_ULL(14)
+#define ICE_PHY_TYPE_LOW_10GBASE_LR BIT_ULL(15)
+#define ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 BIT_ULL(16)
+#define ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC BIT_ULL(17)
+#define ICE_PHY_TYPE_LOW_10G_SFI_C2C BIT_ULL(18)
+#define ICE_PHY_TYPE_LOW_25GBASE_T BIT_ULL(19)
+#define ICE_PHY_TYPE_LOW_25GBASE_CR BIT_ULL(20)
+#define ICE_PHY_TYPE_LOW_25GBASE_CR_S BIT_ULL(21)
+#define ICE_PHY_TYPE_LOW_25GBASE_CR1 BIT_ULL(22)
+#define ICE_PHY_TYPE_LOW_25GBASE_SR BIT_ULL(23)
+#define ICE_PHY_TYPE_LOW_25GBASE_LR BIT_ULL(24)
+#define ICE_PHY_TYPE_LOW_25GBASE_KR BIT_ULL(25)
+#define ICE_PHY_TYPE_LOW_25GBASE_KR_S BIT_ULL(26)
+#define ICE_PHY_TYPE_LOW_25GBASE_KR1 BIT_ULL(27)
+#define ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC BIT_ULL(28)
+#define ICE_PHY_TYPE_LOW_25G_AUI_C2C BIT_ULL(29)
+#define ICE_PHY_TYPE_LOW_40GBASE_CR4 BIT_ULL(30)
+#define ICE_PHY_TYPE_LOW_40GBASE_SR4 BIT_ULL(31)
+#define ICE_PHY_TYPE_LOW_40GBASE_LR4 BIT_ULL(32)
+#define ICE_PHY_TYPE_LOW_40GBASE_KR4 BIT_ULL(33)
+#define ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC BIT_ULL(34)
+#define ICE_PHY_TYPE_LOW_40G_XLAUI BIT_ULL(35)
+#define ICE_PHY_TYPE_LOW_50GBASE_CR2 BIT_ULL(36)
+#define ICE_PHY_TYPE_LOW_50GBASE_SR2 BIT_ULL(37)
+#define ICE_PHY_TYPE_LOW_50GBASE_LR2 BIT_ULL(38)
+#define ICE_PHY_TYPE_LOW_50GBASE_KR2 BIT_ULL(39)
+#define ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC BIT_ULL(40)
+#define ICE_PHY_TYPE_LOW_50G_LAUI2 BIT_ULL(41)
+#define ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC BIT_ULL(42)
+#define ICE_PHY_TYPE_LOW_50G_AUI2 BIT_ULL(43)
+#define ICE_PHY_TYPE_LOW_50GBASE_CP BIT_ULL(44)
+#define ICE_PHY_TYPE_LOW_50GBASE_SR BIT_ULL(45)
+#define ICE_PHY_TYPE_LOW_50GBASE_FR BIT_ULL(46)
+#define ICE_PHY_TYPE_LOW_50GBASE_LR BIT_ULL(47)
+#define ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4 BIT_ULL(48)
+#define ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC BIT_ULL(49)
+#define ICE_PHY_TYPE_LOW_50G_AUI1 BIT_ULL(50)
+#define ICE_PHY_TYPE_LOW_100GBASE_CR4 BIT_ULL(51)
+#define ICE_PHY_TYPE_LOW_100GBASE_SR4 BIT_ULL(52)
+#define ICE_PHY_TYPE_LOW_100GBASE_LR4 BIT_ULL(53)
+#define ICE_PHY_TYPE_LOW_100GBASE_KR4 BIT_ULL(54)
+#define ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC BIT_ULL(55)
+#define ICE_PHY_TYPE_LOW_100G_CAUI4 BIT_ULL(56)
+#define ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC BIT_ULL(57)
+#define ICE_PHY_TYPE_LOW_100G_AUI4 BIT_ULL(58)
+#define ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 BIT_ULL(59)
+#define ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 BIT_ULL(60)
+#define ICE_PHY_TYPE_LOW_100GBASE_CP2 BIT_ULL(61)
+#define ICE_PHY_TYPE_LOW_100GBASE_SR2 BIT_ULL(62)
+#define ICE_PHY_TYPE_LOW_100GBASE_DR BIT_ULL(63)
+#define ICE_PHY_TYPE_LOW_MAX_INDEX 63
+/* The second set of defines is for phy_type_high. */
+#define ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4 BIT_ULL(0)
+#define ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC BIT_ULL(1)
+#define ICE_PHY_TYPE_HIGH_100G_CAUI2 BIT_ULL(2)
+#define ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC BIT_ULL(3)
+#define ICE_PHY_TYPE_HIGH_100G_AUI2 BIT_ULL(4)
+#define ICE_PHY_TYPE_HIGH_MAX_INDEX 5
+
+struct ice_aqc_get_phy_caps_data {
+ __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
+ __le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */
+ u8 caps;
+#define ICE_AQC_PHY_EN_TX_LINK_PAUSE BIT(0)
+#define ICE_AQC_PHY_EN_RX_LINK_PAUSE BIT(1)
+#define ICE_AQC_PHY_LOW_POWER_MODE BIT(2)
+#define ICE_AQC_PHY_EN_LINK BIT(3)
+#define ICE_AQC_PHY_AN_MODE BIT(4)
+#define ICE_AQC_GET_PHY_EN_MOD_QUAL BIT(5)
+#define ICE_AQC_PHY_EN_AUTO_FEC BIT(7)
+#define ICE_AQC_PHY_CAPS_MASK ICE_M(0xff, 0)
+ u8 low_power_ctrl_an;
+#define ICE_AQC_PHY_EN_D3COLD_LOW_POWER_AUTONEG BIT(0)
+#define ICE_AQC_PHY_AN_EN_CLAUSE28 BIT(1)
+#define ICE_AQC_PHY_AN_EN_CLAUSE73 BIT(2)
+#define ICE_AQC_PHY_AN_EN_CLAUSE37 BIT(3)
+ __le16 eee_cap;
+#define ICE_AQC_PHY_EEE_EN_100BASE_TX BIT(0)
+#define ICE_AQC_PHY_EEE_EN_1000BASE_T BIT(1)
+#define ICE_AQC_PHY_EEE_EN_10GBASE_T BIT(2)
+#define ICE_AQC_PHY_EEE_EN_1000BASE_KX BIT(3)
+#define ICE_AQC_PHY_EEE_EN_10GBASE_KR BIT(4)
+#define ICE_AQC_PHY_EEE_EN_25GBASE_KR BIT(5)
+#define ICE_AQC_PHY_EEE_EN_40GBASE_KR4 BIT(6)
+ __le16 eeer_value;
+ u8 phy_id_oui[4]; /* PHY/Module ID connected on the port */
+ u8 phy_fw_ver[8];
+ u8 link_fec_options;
+#define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN BIT(0)
+#define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ BIT(1)
+#define ICE_AQC_PHY_FEC_25G_RS_528_REQ BIT(2)
+#define ICE_AQC_PHY_FEC_25G_KR_REQ BIT(3)
+#define ICE_AQC_PHY_FEC_25G_RS_544_REQ BIT(4)
+#define ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN BIT(6)
+#define ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN BIT(7)
+#define ICE_AQC_PHY_FEC_MASK ICE_M(0xdf, 0)
+ u8 module_compliance_enforcement;
+#define ICE_AQC_MOD_ENFORCE_STRICT_MODE BIT(0)
+ u8 extended_compliance_code;
+#define ICE_MODULE_TYPE_TOTAL_BYTE 3
+ u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE];
+#define ICE_AQC_MOD_TYPE_BYTE0_SFP_PLUS 0xA0
+#define ICE_AQC_MOD_TYPE_BYTE0_QSFP_PLUS 0x80
+#define ICE_AQC_MOD_TYPE_IDENT 1
+#define ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE BIT(0)
+#define ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE BIT(1)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_SR BIT(4)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_LR BIT(5)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_LRM BIT(6)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_ER BIT(7)
+#define ICE_AQC_MOD_TYPE_BYTE2_SFP_PLUS 0xA0
+#define ICE_AQC_MOD_TYPE_BYTE2_QSFP_PLUS 0x86
+ u8 qualified_module_count;
+ u8 rsvd2[7]; /* Bytes 47:41 reserved */
+#define ICE_AQC_QUAL_MOD_COUNT_MAX 16
+ struct {
+ u8 v_oui[3];
+ u8 rsvd3;
+ u8 v_part[16];
+ __le32 v_rev;
+ __le64 rsvd4;
+ } qual_modules[ICE_AQC_QUAL_MOD_COUNT_MAX];
+};
+
+/* Set PHY capabilities (direct 0x0601)
+ * NOTE: This command must be followed by setup link and restart auto-neg
+ */
+struct ice_aqc_set_phy_cfg {
+ u8 lport_num;
+ u8 reserved[7];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Set PHY config command data structure */
+struct ice_aqc_set_phy_cfg_data {
+ __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
+ __le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */
+ u8 caps;
+#define ICE_AQ_PHY_ENA_VALID_MASK ICE_M(0xef, 0)
+#define ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY BIT(0)
+#define ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY BIT(1)
+#define ICE_AQ_PHY_ENA_LOW_POWER BIT(2)
+#define ICE_AQ_PHY_ENA_LINK BIT(3)
+#define ICE_AQ_PHY_ENA_AUTO_LINK_UPDT BIT(5)
+#define ICE_AQ_PHY_ENA_LESM BIT(6)
+#define ICE_AQ_PHY_ENA_AUTO_FEC BIT(7)
+ u8 low_power_ctrl_an;
+ __le16 eee_cap; /* Value from ice_aqc_get_phy_caps */
+ __le16 eeer_value;
+ u8 link_fec_opt; /* Use defines from ice_aqc_get_phy_caps */
+ u8 module_compliance_enforcement;
+};
+
+/* Set MAC Config command data structure (direct 0x0603) */
+struct ice_aqc_set_mac_cfg {
+ __le16 max_frame_size;
+ u8 params;
+#define ICE_AQ_SET_MAC_PACE_S 3
+#define ICE_AQ_SET_MAC_PACE_M (0xF << ICE_AQ_SET_MAC_PACE_S)
+#define ICE_AQ_SET_MAC_PACE_TYPE_M BIT(7)
+#define ICE_AQ_SET_MAC_PACE_TYPE_RATE 0
+#define ICE_AQ_SET_MAC_PACE_TYPE_FIXED ICE_AQ_SET_MAC_PACE_TYPE_M
+ u8 tx_tmr_priority;
+ __le16 tx_tmr_value;
+ __le16 fc_refresh_threshold;
+ u8 drop_opts;
+#define ICE_AQ_SET_MAC_AUTO_DROP_MASK BIT(0)
+#define ICE_AQ_SET_MAC_AUTO_DROP_NONE 0
+#define ICE_AQ_SET_MAC_AUTO_DROP_BLOCKING_PKTS BIT(0)
+ u8 reserved[7];
+};
+
+/* Restart AN command data structure (direct 0x0605)
+ * Also used for response, with only the lport_num field present.
+ */
+struct ice_aqc_restart_an {
+ u8 lport_num;
+ u8 reserved;
+ u8 cmd_flags;
+#define ICE_AQC_RESTART_AN_LINK_RESTART BIT(1)
+#define ICE_AQC_RESTART_AN_LINK_ENABLE BIT(2)
+ u8 reserved2[13];
+};
+
+/* Get link status (indirect 0x0607), also used for Link Status Event */
+struct ice_aqc_get_link_status {
+ u8 lport_num;
+ u8 reserved;
+ __le16 cmd_flags;
+#define ICE_AQ_LSE_M 0x3
+#define ICE_AQ_LSE_NOP 0x0
+#define ICE_AQ_LSE_DIS 0x2
+#define ICE_AQ_LSE_ENA 0x3
+ /* only response uses this flag */
+#define ICE_AQ_LSE_IS_ENABLED 0x1
+ __le32 reserved2;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Get link status response data structure, also used for Link Status Event */
+struct ice_aqc_get_link_status_data {
+ u8 topo_media_conflict;
+#define ICE_AQ_LINK_TOPO_CONFLICT BIT(0)
+#define ICE_AQ_LINK_MEDIA_CONFLICT BIT(1)
+#define ICE_AQ_LINK_TOPO_CORRUPT BIT(2)
+#define ICE_AQ_LINK_TOPO_UNREACH_PRT BIT(4)
+#define ICE_AQ_LINK_TOPO_UNDRUTIL_PRT BIT(5)
+#define ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA BIT(6)
+#define ICE_AQ_LINK_TOPO_UNSUPP_MEDIA BIT(7)
+ u8 link_cfg_err;
+#define ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED BIT(5)
+#define ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE BIT(6)
+#define ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT BIT(7)
+ u8 link_info;
+#define ICE_AQ_LINK_UP BIT(0) /* Link Status */
+#define ICE_AQ_LINK_FAULT BIT(1)
+#define ICE_AQ_LINK_FAULT_TX BIT(2)
+#define ICE_AQ_LINK_FAULT_RX BIT(3)
+#define ICE_AQ_LINK_FAULT_REMOTE BIT(4)
+#define ICE_AQ_LINK_UP_PORT BIT(5) /* External Port Link Status */
+#define ICE_AQ_MEDIA_AVAILABLE BIT(6)
+#define ICE_AQ_SIGNAL_DETECT BIT(7)
+ u8 an_info;
+#define ICE_AQ_AN_COMPLETED BIT(0)
+#define ICE_AQ_LP_AN_ABILITY BIT(1)
+#define ICE_AQ_PD_FAULT BIT(2) /* Parallel Detection Fault */
+#define ICE_AQ_FEC_EN BIT(3)
+#define ICE_AQ_PHY_LOW_POWER BIT(4) /* Low Power State */
+#define ICE_AQ_LINK_PAUSE_TX BIT(5)
+#define ICE_AQ_LINK_PAUSE_RX BIT(6)
+#define ICE_AQ_QUALIFIED_MODULE BIT(7)
+ u8 ext_info;
+#define ICE_AQ_LINK_PHY_TEMP_ALARM BIT(0)
+#define ICE_AQ_LINK_EXCESSIVE_ERRORS BIT(1) /* Excessive Link Errors */
+ /* Port Tx Suspended */
+#define ICE_AQ_LINK_TX_S 2
+#define ICE_AQ_LINK_TX_M (0x03 << ICE_AQ_LINK_TX_S)
+#define ICE_AQ_LINK_TX_ACTIVE 0
+#define ICE_AQ_LINK_TX_DRAINED 1
+#define ICE_AQ_LINK_TX_FLUSHED 3
+ u8 reserved2;
+ __le16 max_frame_size;
+ u8 cfg;
+#define ICE_AQ_LINK_25G_KR_FEC_EN BIT(0)
+#define ICE_AQ_LINK_25G_RS_528_FEC_EN BIT(1)
+#define ICE_AQ_LINK_25G_RS_544_FEC_EN BIT(2)
+#define ICE_AQ_FEC_MASK ICE_M(0x7, 0)
+ /* Pacing Config */
+#define ICE_AQ_CFG_PACING_S 3
+#define ICE_AQ_CFG_PACING_M (0xF << ICE_AQ_CFG_PACING_S)
+#define ICE_AQ_CFG_PACING_TYPE_M BIT(7)
+#define ICE_AQ_CFG_PACING_TYPE_AVG 0
+#define ICE_AQ_CFG_PACING_TYPE_FIXED ICE_AQ_CFG_PACING_TYPE_M
+ /* External Device Power Ability */
+ u8 power_desc;
+#define ICE_AQ_PWR_CLASS_M 0x3F
+#define ICE_AQ_LINK_PWR_BASET_LOW_HIGH 0
+#define ICE_AQ_LINK_PWR_BASET_HIGH 1
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_1 0
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_2 1
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_3 2
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_4 3
+ __le16 link_speed;
+#define ICE_AQ_LINK_SPEED_M 0x7FF
+#define ICE_AQ_LINK_SPEED_10MB BIT(0)
+#define ICE_AQ_LINK_SPEED_100MB BIT(1)
+#define ICE_AQ_LINK_SPEED_1000MB BIT(2)
+#define ICE_AQ_LINK_SPEED_2500MB BIT(3)
+#define ICE_AQ_LINK_SPEED_5GB BIT(4)
+#define ICE_AQ_LINK_SPEED_10GB BIT(5)
+#define ICE_AQ_LINK_SPEED_20GB BIT(6)
+#define ICE_AQ_LINK_SPEED_25GB BIT(7)
+#define ICE_AQ_LINK_SPEED_40GB BIT(8)
+#define ICE_AQ_LINK_SPEED_50GB BIT(9)
+#define ICE_AQ_LINK_SPEED_100GB BIT(10)
+#define ICE_AQ_LINK_SPEED_UNKNOWN BIT(15)
+ __le32 reserved3; /* Aligns next field to 8-byte boundary */
+ __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
+ __le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */
+};
+
+/* Set event mask command (direct 0x0613) */
+struct ice_aqc_set_event_mask {
+ u8 lport_num;
+ u8 reserved[7];
+ __le16 event_mask;
+#define ICE_AQ_LINK_EVENT_UPDOWN BIT(1)
+#define ICE_AQ_LINK_EVENT_MEDIA_NA BIT(2)
+#define ICE_AQ_LINK_EVENT_LINK_FAULT BIT(3)
+#define ICE_AQ_LINK_EVENT_PHY_TEMP_ALARM BIT(4)
+#define ICE_AQ_LINK_EVENT_EXCESSIVE_ERRORS BIT(5)
+#define ICE_AQ_LINK_EVENT_SIGNAL_DETECT BIT(6)
+#define ICE_AQ_LINK_EVENT_AN_COMPLETED BIT(7)
+#define ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL BIT(8)
+#define ICE_AQ_LINK_EVENT_PORT_TX_SUSPENDED BIT(9)
+#define ICE_AQ_LINK_EVENT_PHY_FW_LOAD_FAIL BIT(12)
+ u8 reserved1[6];
+};
+
+/* Set MAC Loopback command (direct 0x0620) */
+struct ice_aqc_set_mac_lb {
+ u8 lb_mode;
+#define ICE_AQ_MAC_LB_EN BIT(0)
+#define ICE_AQ_MAC_LB_OSC_CLK BIT(1)
+ u8 reserved[15];
+};
+
+struct ice_aqc_link_topo_params {
+ u8 lport_num;
+ u8 lport_num_valid;
+#define ICE_AQC_LINK_TOPO_PORT_NUM_VALID BIT(0)
+ u8 node_type_ctx;
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_S 0
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_M (0xF << ICE_AQC_LINK_TOPO_NODE_TYPE_S)
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_PHY 0
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_GPIO_CTRL 1
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_MUX_CTRL 2
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_LED_CTRL 3
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_LED 4
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_THERMAL 5
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_CAGE 6
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_MEZZ 7
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_ID_EEPROM 8
+#define ICE_AQC_LINK_TOPO_NODE_CTX_S 4
+#define ICE_AQC_LINK_TOPO_NODE_CTX_M \
+ (0xF << ICE_AQC_LINK_TOPO_NODE_CTX_S)
+#define ICE_AQC_LINK_TOPO_NODE_CTX_GLOBAL 0
+#define ICE_AQC_LINK_TOPO_NODE_CTX_BOARD 1
+#define ICE_AQC_LINK_TOPO_NODE_CTX_PORT 2
+#define ICE_AQC_LINK_TOPO_NODE_CTX_NODE 3
+#define ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED 4
+#define ICE_AQC_LINK_TOPO_NODE_CTX_OVERRIDE 5
+ u8 index;
+};
+
+struct ice_aqc_link_topo_addr {
+ struct ice_aqc_link_topo_params topo_params;
+ __le16 handle;
+#define ICE_AQC_LINK_TOPO_HANDLE_S 0
+#define ICE_AQC_LINK_TOPO_HANDLE_M (0x3FF << ICE_AQC_LINK_TOPO_HANDLE_S)
+/* Used to decode the handle field */
+#define ICE_AQC_LINK_TOPO_HANDLE_BRD_TYPE_M BIT(9)
+#define ICE_AQC_LINK_TOPO_HANDLE_BRD_TYPE_LOM BIT(9)
+#define ICE_AQC_LINK_TOPO_HANDLE_BRD_TYPE_MEZZ 0
+#define ICE_AQC_LINK_TOPO_HANDLE_NODE_S 0
+/* In case of a Mezzanine type */
+#define ICE_AQC_LINK_TOPO_HANDLE_MEZZ_NODE_M \
+ (0x3F << ICE_AQC_LINK_TOPO_HANDLE_NODE_S)
+#define ICE_AQC_LINK_TOPO_HANDLE_MEZZ_S 6
+#define ICE_AQC_LINK_TOPO_HANDLE_MEZZ_M (0x7 << ICE_AQC_LINK_TOPO_HANDLE_MEZZ_S)
+/* In case of a LOM type */
+#define ICE_AQC_LINK_TOPO_HANDLE_LOM_NODE_M \
+ (0x1FF << ICE_AQC_LINK_TOPO_HANDLE_NODE_S)
+};
+
+/* Get Link Topology Handle (direct, 0x06E0) */
+struct ice_aqc_get_link_topo {
+ struct ice_aqc_link_topo_addr addr;
+ u8 node_part_num;
+#define ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575 0x21
+ u8 rsvd[9];
+};
+
+/* Read/Write I2C (direct, 0x06E2/0x06E3) */
+struct ice_aqc_i2c {
+ struct ice_aqc_link_topo_addr topo_addr;
+ __le16 i2c_addr;
+ u8 i2c_params;
+#define ICE_AQC_I2C_DATA_SIZE_M GENMASK(3, 0)
+#define ICE_AQC_I2C_USE_REPEATED_START BIT(7)
+
+ u8 rsvd;
+ __le16 i2c_bus_addr;
+ u8 i2c_data[4]; /* Used only by write command, reserved in read. */
+};
+
+/* Read I2C Response (direct, 0x06E2) */
+struct ice_aqc_read_i2c_resp {
+ u8 i2c_data[16];
+};
+
+/* Set Port Identification LED (direct, 0x06E9) */
+struct ice_aqc_set_port_id_led {
+ u8 lport_num;
+ u8 lport_num_valid;
+ u8 ident_mode;
+#define ICE_AQC_PORT_IDENT_LED_BLINK BIT(0)
+#define ICE_AQC_PORT_IDENT_LED_ORIG 0
+ u8 rsvd[13];
+};
+
+/* Get Port Options (indirect, 0x06EA) */
+struct ice_aqc_get_port_options {
+ u8 lport_num;
+ u8 lport_num_valid;
+ u8 port_options_count;
+#define ICE_AQC_PORT_OPT_COUNT_M GENMASK(3, 0)
+#define ICE_AQC_PORT_OPT_MAX 16
+
+ u8 innermost_phy_index;
+ u8 port_options;
+#define ICE_AQC_PORT_OPT_ACTIVE_M GENMASK(3, 0)
+#define ICE_AQC_PORT_OPT_VALID BIT(7)
+
+ u8 pending_port_option_status;
+#define ICE_AQC_PENDING_PORT_OPT_IDX_M GENMASK(3, 0)
+#define ICE_AQC_PENDING_PORT_OPT_VALID BIT(7)
+
+ u8 rsvd[2];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+struct ice_aqc_get_port_options_elem {
+ u8 pmd;
+#define ICE_AQC_PORT_OPT_PMD_COUNT_M GENMASK(3, 0)
+
+ u8 max_lane_speed;
+#define ICE_AQC_PORT_OPT_MAX_LANE_M GENMASK(3, 0)
+#define ICE_AQC_PORT_OPT_MAX_LANE_100M 0
+#define ICE_AQC_PORT_OPT_MAX_LANE_1G 1
+#define ICE_AQC_PORT_OPT_MAX_LANE_2500M 2
+#define ICE_AQC_PORT_OPT_MAX_LANE_5G 3
+#define ICE_AQC_PORT_OPT_MAX_LANE_10G 4
+#define ICE_AQC_PORT_OPT_MAX_LANE_25G 5
+#define ICE_AQC_PORT_OPT_MAX_LANE_50G 6
+#define ICE_AQC_PORT_OPT_MAX_LANE_100G 7
+
+ u8 global_scid[2];
+ u8 phy_scid[2];
+ u8 pf2port_cid[2];
+};
+
+/* Set Port Option (direct, 0x06EB) */
+struct ice_aqc_set_port_option {
+ u8 lport_num;
+ u8 lport_num_valid;
+ u8 selected_port_option;
+ u8 rsvd[13];
+};
+
+/* Set/Get GPIO (direct, 0x06EC/0x06ED) */
+struct ice_aqc_gpio {
+ __le16 gpio_ctrl_handle;
+#define ICE_AQC_GPIO_HANDLE_S 0
+#define ICE_AQC_GPIO_HANDLE_M (0x3FF << ICE_AQC_GPIO_HANDLE_S)
+ u8 gpio_num;
+ u8 gpio_val;
+ u8 rsvd[12];
+};
+
+/* Read/Write SFF EEPROM command (indirect 0x06EE) */
+struct ice_aqc_sff_eeprom {
+ u8 lport_num;
+ u8 lport_num_valid;
+#define ICE_AQC_SFF_PORT_NUM_VALID BIT(0)
+ __le16 i2c_bus_addr;
+#define ICE_AQC_SFF_I2CBUS_7BIT_M 0x7F
+#define ICE_AQC_SFF_I2CBUS_10BIT_M 0x3FF
+#define ICE_AQC_SFF_I2CBUS_TYPE_M BIT(10)
+#define ICE_AQC_SFF_I2CBUS_TYPE_7BIT 0
+#define ICE_AQC_SFF_I2CBUS_TYPE_10BIT ICE_AQC_SFF_I2CBUS_TYPE_M
+#define ICE_AQC_SFF_SET_EEPROM_PAGE_S 11
+#define ICE_AQC_SFF_SET_EEPROM_PAGE_M (0x3 << ICE_AQC_SFF_SET_EEPROM_PAGE_S)
+#define ICE_AQC_SFF_NO_PAGE_CHANGE 0
+#define ICE_AQC_SFF_SET_23_ON_MISMATCH 1
+#define ICE_AQC_SFF_SET_22_ON_MISMATCH 2
+#define ICE_AQC_SFF_IS_WRITE BIT(15)
+ __le16 i2c_mem_addr;
+ __le16 eeprom_page;
+#define ICE_AQC_SFF_EEPROM_BANK_S 0
+#define ICE_AQC_SFF_EEPROM_BANK_M (0xFF << ICE_AQC_SFF_EEPROM_BANK_S)
+#define ICE_AQC_SFF_EEPROM_PAGE_S 8
+#define ICE_AQC_SFF_EEPROM_PAGE_M (0xFF << ICE_AQC_SFF_EEPROM_PAGE_S)
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* NVM Read command (indirect 0x0701)
+ * NVM Erase commands (direct 0x0702)
+ * NVM Update commands (indirect 0x0703)
+ */
+struct ice_aqc_nvm {
+#define ICE_AQC_NVM_MAX_OFFSET 0xFFFFFF
+ __le16 offset_low;
+ u8 offset_high;
+ u8 cmd_flags;
+#define ICE_AQC_NVM_LAST_CMD BIT(0)
+#define ICE_AQC_NVM_PCIR_REQ BIT(0) /* Used by NVM Update reply */
+#define ICE_AQC_NVM_PRESERVATION_S 1
+#define ICE_AQC_NVM_PRESERVATION_M (3 << ICE_AQC_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_NO_PRESERVATION (0 << ICE_AQC_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_PRESERVE_ALL BIT(1)
+#define ICE_AQC_NVM_FACTORY_DEFAULT (2 << ICE_AQC_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_PRESERVE_SELECTED (3 << ICE_AQC_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_ACTIV_SEL_NVM BIT(3) /* Write Activate/SR Dump only */
+#define ICE_AQC_NVM_ACTIV_SEL_OROM BIT(4)
+#define ICE_AQC_NVM_ACTIV_SEL_NETLIST BIT(5)
+#define ICE_AQC_NVM_SPECIAL_UPDATE BIT(6)
+#define ICE_AQC_NVM_REVERT_LAST_ACTIV BIT(6) /* Write Activate only */
+#define ICE_AQC_NVM_ACTIV_SEL_MASK ICE_M(0x7, 3)
+#define ICE_AQC_NVM_FLASH_ONLY BIT(7)
+#define ICE_AQC_NVM_RESET_LVL_M ICE_M(0x3, 0) /* Write reply only */
+#define ICE_AQC_NVM_POR_FLAG 0
+#define ICE_AQC_NVM_PERST_FLAG 1
+#define ICE_AQC_NVM_EMPR_FLAG 2
+#define ICE_AQC_NVM_EMPR_ENA BIT(0) /* Write Activate reply only */
+ /* For Write Activate, several flags are sent as part of a separate
+ * flags2 field using a separate byte. For simplicity of the software
+ * interface, we pass the flags as a 16 bit value so these flags are
+ * all offset by 8 bits
+ */
+#define ICE_AQC_NVM_ACTIV_REQ_EMPR BIT(8) /* NVM Write Activate only */
+ __le16 module_typeid;
+ __le16 length;
+#define ICE_AQC_NVM_ERASE_LEN 0xFFFF
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+#define ICE_AQC_NVM_START_POINT 0
+
+/* NVM Checksum Command (direct, 0x0706) */
+struct ice_aqc_nvm_checksum {
+ u8 flags;
+#define ICE_AQC_NVM_CHECKSUM_VERIFY BIT(0)
+#define ICE_AQC_NVM_CHECKSUM_RECALC BIT(1)
+ u8 rsvd;
+ __le16 checksum; /* Used only by response */
+#define ICE_AQC_NVM_CHECKSUM_CORRECT 0xBABA
+ u8 rsvd2[12];
+};
+
+/* Used for NVM Set Package Data command - 0x070A */
+struct ice_aqc_nvm_pkg_data {
+ u8 reserved[3];
+ u8 cmd_flags;
+#define ICE_AQC_NVM_PKG_DELETE BIT(0) /* used for command call */
+#define ICE_AQC_NVM_PKG_SKIPPED BIT(0) /* used for command response */
+
+ u32 reserved1;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Used for Pass Component Table command - 0x070B */
+struct ice_aqc_nvm_pass_comp_tbl {
+ u8 component_response; /* Response only */
+#define ICE_AQ_NVM_PASS_COMP_CAN_BE_UPDATED 0x0
+#define ICE_AQ_NVM_PASS_COMP_CAN_MAY_BE_UPDATEABLE 0x1
+#define ICE_AQ_NVM_PASS_COMP_CAN_NOT_BE_UPDATED 0x2
+ u8 component_response_code; /* Response only */
+#define ICE_AQ_NVM_PASS_COMP_CAN_BE_UPDATED_CODE 0x0
+#define ICE_AQ_NVM_PASS_COMP_STAMP_IDENTICAL_CODE 0x1
+#define ICE_AQ_NVM_PASS_COMP_STAMP_LOWER 0x2
+#define ICE_AQ_NVM_PASS_COMP_INVALID_STAMP_CODE 0x3
+#define ICE_AQ_NVM_PASS_COMP_CONFLICT_CODE 0x4
+#define ICE_AQ_NVM_PASS_COMP_PRE_REQ_NOT_MET_CODE 0x5
+#define ICE_AQ_NVM_PASS_COMP_NOT_SUPPORTED_CODE 0x6
+#define ICE_AQ_NVM_PASS_COMP_CANNOT_DOWNGRADE_CODE 0x7
+#define ICE_AQ_NVM_PASS_COMP_INCOMPLETE_IMAGE_CODE 0x8
+#define ICE_AQ_NVM_PASS_COMP_VER_STR_IDENTICAL_CODE 0xA
+#define ICE_AQ_NVM_PASS_COMP_VER_STR_LOWER_CODE 0xB
+ u8 reserved;
+ u8 transfer_flag;
+#define ICE_AQ_NVM_PASS_COMP_TBL_START 0x1
+#define ICE_AQ_NVM_PASS_COMP_TBL_MIDDLE 0x2
+#define ICE_AQ_NVM_PASS_COMP_TBL_END 0x4
+#define ICE_AQ_NVM_PASS_COMP_TBL_START_AND_END 0x5
+ __le32 reserved1;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+struct ice_aqc_nvm_comp_tbl {
+ __le16 comp_class;
+#define NVM_COMP_CLASS_ALL_FW 0x000A
+
+ __le16 comp_id;
+#define NVM_COMP_ID_OROM 0x5
+#define NVM_COMP_ID_NVM 0x6
+#define NVM_COMP_ID_NETLIST 0x8
+
+ u8 comp_class_idx;
+#define FWU_COMP_CLASS_IDX_NOT_USE 0x0
+
+ __le32 comp_cmp_stamp;
+ u8 cvs_type;
+#define NVM_CVS_TYPE_ASCII 0x1
+
+ u8 cvs_len;
+ u8 cvs[]; /* Component Version String */
+} __packed;
+
+/* Send to PF command (indirect 0x0801) ID is only used by PF
+ *
+ * Send to VF command (indirect 0x0802) ID is only used by PF
+ *
+ */
+struct ice_aqc_pf_vf_msg {
+ __le32 id;
+ u32 reserved;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Get LLDP MIB (indirect 0x0A00)
+ * Note: This is also used by the LLDP MIB Change Event (0x0A01)
+ * as the format is the same.
+ */
+struct ice_aqc_lldp_get_mib {
+ u8 type;
+#define ICE_AQ_LLDP_MIB_TYPE_S 0
+#define ICE_AQ_LLDP_MIB_TYPE_M (0x3 << ICE_AQ_LLDP_MIB_TYPE_S)
+#define ICE_AQ_LLDP_MIB_LOCAL 0
+#define ICE_AQ_LLDP_MIB_REMOTE 1
+#define ICE_AQ_LLDP_MIB_LOCAL_AND_REMOTE 2
+#define ICE_AQ_LLDP_BRID_TYPE_S 2
+#define ICE_AQ_LLDP_BRID_TYPE_M (0x3 << ICE_AQ_LLDP_BRID_TYPE_S)
+#define ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID 0
+#define ICE_AQ_LLDP_BRID_TYPE_NON_TPMR 1
+/* Tx pause flags in the 0xA01 event use ICE_AQ_LLDP_TX_* */
+#define ICE_AQ_LLDP_TX_S 0x4
+#define ICE_AQ_LLDP_TX_M (0x03 << ICE_AQ_LLDP_TX_S)
+#define ICE_AQ_LLDP_TX_ACTIVE 0
+#define ICE_AQ_LLDP_TX_SUSPENDED 1
+#define ICE_AQ_LLDP_TX_FLUSHED 3
+/* The following bytes are reserved for the Get LLDP MIB command (0x0A00)
+ * and in the LLDP MIB Change Event (0x0A01). They are valid for the
+ * Get LLDP MIB (0x0A00) response only.
+ */
+ u8 reserved1;
+ __le16 local_len;
+ __le16 remote_len;
+ u8 reserved2[2];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Configure LLDP MIB Change Event (direct 0x0A01) */
+/* For MIB Change Event use ice_aqc_lldp_get_mib structure above */
+struct ice_aqc_lldp_set_mib_change {
+ u8 command;
+#define ICE_AQ_LLDP_MIB_UPDATE_ENABLE 0x0
+#define ICE_AQ_LLDP_MIB_UPDATE_DIS 0x1
+ u8 reserved[15];
+};
+
+/* Stop LLDP (direct 0x0A05) */
+struct ice_aqc_lldp_stop {
+ u8 command;
+#define ICE_AQ_LLDP_AGENT_STATE_MASK BIT(0)
+#define ICE_AQ_LLDP_AGENT_STOP 0x0
+#define ICE_AQ_LLDP_AGENT_SHUTDOWN ICE_AQ_LLDP_AGENT_STATE_MASK
+#define ICE_AQ_LLDP_AGENT_PERSIST_DIS BIT(1)
+ u8 reserved[15];
+};
+
+/* Start LLDP (direct 0x0A06) */
+struct ice_aqc_lldp_start {
+ u8 command;
+#define ICE_AQ_LLDP_AGENT_START BIT(0)
+#define ICE_AQ_LLDP_AGENT_PERSIST_ENA BIT(1)
+ u8 reserved[15];
+};
+
+/* Get CEE DCBX Oper Config (0x0A07)
+ * The command uses the generic descriptor struct and
+ * returns the struct below as an indirect response.
+ */
+struct ice_aqc_get_cee_dcb_cfg_resp {
+ u8 oper_num_tc;
+ u8 oper_prio_tc[4];
+ u8 oper_tc_bw[8];
+ u8 oper_pfc_en;
+ __le16 oper_app_prio;
+#define ICE_AQC_CEE_APP_FCOE_S 0
+#define ICE_AQC_CEE_APP_FCOE_M (0x7 << ICE_AQC_CEE_APP_FCOE_S)
+#define ICE_AQC_CEE_APP_ISCSI_S 3
+#define ICE_AQC_CEE_APP_ISCSI_M (0x7 << ICE_AQC_CEE_APP_ISCSI_S)
+#define ICE_AQC_CEE_APP_FIP_S 8
+#define ICE_AQC_CEE_APP_FIP_M (0x7 << ICE_AQC_CEE_APP_FIP_S)
+ __le32 tlv_status;
+#define ICE_AQC_CEE_PG_STATUS_S 0
+#define ICE_AQC_CEE_PG_STATUS_M (0x7 << ICE_AQC_CEE_PG_STATUS_S)
+#define ICE_AQC_CEE_PFC_STATUS_S 3
+#define ICE_AQC_CEE_PFC_STATUS_M (0x7 << ICE_AQC_CEE_PFC_STATUS_S)
+#define ICE_AQC_CEE_FCOE_STATUS_S 8
+#define ICE_AQC_CEE_FCOE_STATUS_M (0x7 << ICE_AQC_CEE_FCOE_STATUS_S)
+#define ICE_AQC_CEE_ISCSI_STATUS_S 11
+#define ICE_AQC_CEE_ISCSI_STATUS_M (0x7 << ICE_AQC_CEE_ISCSI_STATUS_S)
+#define ICE_AQC_CEE_FIP_STATUS_S 16
+#define ICE_AQC_CEE_FIP_STATUS_M (0x7 << ICE_AQC_CEE_FIP_STATUS_S)
+ u8 reserved[12];
+};
+
+/* Set Local LLDP MIB (indirect 0x0A08)
+ * Used to replace the local MIB of a given LLDP agent. e.g. DCBX
+ */
+struct ice_aqc_lldp_set_local_mib {
+ u8 type;
+#define SET_LOCAL_MIB_TYPE_DCBX_M BIT(0)
+#define SET_LOCAL_MIB_TYPE_LOCAL_MIB 0
+#define SET_LOCAL_MIB_TYPE_CEE_M BIT(1)
+#define SET_LOCAL_MIB_TYPE_CEE_WILLING 0
+#define SET_LOCAL_MIB_TYPE_CEE_NON_WILLING SET_LOCAL_MIB_TYPE_CEE_M
+ u8 reserved0;
+ __le16 length;
+ u8 reserved1[4];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Stop/Start LLDP Agent (direct 0x0A09)
+ * Used for stopping/starting specific LLDP agent. e.g. DCBX.
+ * The same structure is used for the response, with the command field
+ * being used as the status field.
+ */
+struct ice_aqc_lldp_stop_start_specific_agent {
+ u8 command;
+#define ICE_AQC_START_STOP_AGENT_M BIT(0)
+#define ICE_AQC_START_STOP_AGENT_STOP_DCBX 0
+#define ICE_AQC_START_STOP_AGENT_START_DCBX ICE_AQC_START_STOP_AGENT_M
+ u8 reserved[15];
+};
+
+/* LLDP Filter Control (direct 0x0A0A) */
+struct ice_aqc_lldp_filter_ctrl {
+ u8 cmd_flags;
+#define ICE_AQC_LLDP_FILTER_ACTION_ADD 0x0
+#define ICE_AQC_LLDP_FILTER_ACTION_DELETE 0x1
+ u8 reserved1;
+ __le16 vsi_num;
+ u8 reserved2[12];
+};
+
+/* Get/Set RSS key (indirect 0x0B04/0x0B02) */
+struct ice_aqc_get_set_rss_key {
+#define ICE_AQC_GSET_RSS_KEY_VSI_VALID BIT(15)
+#define ICE_AQC_GSET_RSS_KEY_VSI_ID_S 0
+#define ICE_AQC_GSET_RSS_KEY_VSI_ID_M (0x3FF << ICE_AQC_GSET_RSS_KEY_VSI_ID_S)
+ __le16 vsi_id;
+ u8 reserved[6];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+#define ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE 0x28
+#define ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE 0xC
+#define ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE \
+ (ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE + \
+ ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE)
+
+struct ice_aqc_get_set_rss_keys {
+ u8 standard_rss_key[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE];
+ u8 extended_hash_key[ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE];
+};
+
+/* Get/Set RSS LUT (indirect 0x0B05/0x0B03) */
+struct ice_aqc_get_set_rss_lut {
+#define ICE_AQC_GSET_RSS_LUT_VSI_VALID BIT(15)
+#define ICE_AQC_GSET_RSS_LUT_VSI_ID_S 0
+#define ICE_AQC_GSET_RSS_LUT_VSI_ID_M (0x3FF << ICE_AQC_GSET_RSS_LUT_VSI_ID_S)
+ __le16 vsi_id;
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S 0
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_M \
+ (0x3 << ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S)
+
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI 0
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF 1
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL 2
+
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S 2
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M \
+ (0x3 << ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S)
+
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128 128
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128_FLAG 0
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512 512
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512_FLAG 1
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K 2048
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG 2
+
+#define ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S 4
+#define ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_M \
+ (0xF << ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S)
+
+ __le16 flags;
+ __le32 reserved;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Sideband Control Interface Commands */
+/* Neighbor Device Request (indirect 0x0C00); also used for the response. */
+struct ice_aqc_neigh_dev_req {
+ __le16 sb_data_len;
+ u8 reserved[6];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Add Tx LAN Queues (indirect 0x0C30) */
+struct ice_aqc_add_txqs {
+ u8 num_qgrps;
+ u8 reserved[3];
+ __le32 reserved1;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* This is the descriptor of each queue entry for the Add Tx LAN Queues
+ * command (0x0C30). Only used within struct ice_aqc_add_tx_qgrp.
+ */
+struct ice_aqc_add_txqs_perq {
+ __le16 txq_id;
+ u8 rsvd[2];
+ __le32 q_teid;
+ u8 txq_ctx[22];
+ u8 rsvd2[2];
+ struct ice_aqc_txsched_elem info;
+};
+
+/* The format of the command buffer for Add Tx LAN Queues (0x0C30)
+ * is an array of the following structs. Please note that the length of
+ * each struct ice_aqc_add_tx_qgrp is variable due
+ * to the variable number of queues in each group!
+ */
+struct ice_aqc_add_tx_qgrp {
+ __le32 parent_teid;
+ u8 num_txqs;
+ u8 rsvd[3];
+ struct ice_aqc_add_txqs_perq txqs[];
+};
+
+/* Disable Tx LAN Queues (indirect 0x0C31) */
+struct ice_aqc_dis_txqs {
+ u8 cmd_type;
+#define ICE_AQC_Q_DIS_CMD_S 0
+#define ICE_AQC_Q_DIS_CMD_M (0x3 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_NO_FUNC_RESET (0 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_VM_RESET BIT(ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_VF_RESET (2 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_PF_RESET (3 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_SUBSEQ_CALL BIT(2)
+#define ICE_AQC_Q_DIS_CMD_FLUSH_PIPE BIT(3)
+ u8 num_entries;
+ __le16 vmvf_and_timeout;
+#define ICE_AQC_Q_DIS_VMVF_NUM_S 0
+#define ICE_AQC_Q_DIS_VMVF_NUM_M (0x3FF << ICE_AQC_Q_DIS_VMVF_NUM_S)
+#define ICE_AQC_Q_DIS_TIMEOUT_S 10
+#define ICE_AQC_Q_DIS_TIMEOUT_M (0x3F << ICE_AQC_Q_DIS_TIMEOUT_S)
+ __le32 blocked_cgds;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* The buffer for Disable Tx LAN Queues (indirect 0x0C31)
+ * contains the following structures, arrayed one after the
+ * other.
+ * Note: Since the q_id is 16 bits wide, if the
+ * number of queues is even, then 2 bytes of alignment MUST be
+ * added before the start of the next group, to allow correct
+ * alignment of the parent_teid field.
+ */
+struct ice_aqc_dis_txq_item {
+ __le32 parent_teid;
+ u8 num_qs;
+ u8 rsvd;
+ /* The length of the q_id array varies according to num_qs */
+#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S 15
+#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_LAN_Q \
+ (0 << ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S)
+#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_RDMA_QSET \
+ (1 << ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S)
+ __le16 q_id[];
+} __packed;
+
+/* Add Tx RDMA Queue Set (indirect 0x0C33) */
+struct ice_aqc_add_rdma_qset {
+ u8 num_qset_grps;
+ u8 reserved[7];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* This is the descriptor of each Qset entry for the Add Tx RDMA Queue Set
+ * command (0x0C33). Only used within struct ice_aqc_add_rdma_qset.
+ */
+struct ice_aqc_add_tx_rdma_qset_entry {
+ __le16 tx_qset_id;
+ u8 rsvd[2];
+ __le32 qset_teid;
+ struct ice_aqc_txsched_elem info;
+};
+
+/* The format of the command buffer for Add Tx RDMA Queue Set(0x0C33)
+ * is an array of the following structs. Please note that the length of
+ * each struct ice_aqc_add_rdma_qset is variable due to the variable
+ * number of queues in each group!
+ */
+struct ice_aqc_add_rdma_qset_data {
+ __le32 parent_teid;
+ __le16 num_qsets;
+ u8 rsvd[2];
+ struct ice_aqc_add_tx_rdma_qset_entry rdma_qsets[];
+};
+
+/* Configure Firmware Logging Command (indirect 0xFF09)
+ * Logging Information Read Response (indirect 0xFF10)
+ * Note: The 0xFF10 command has no input parameters.
+ */
+struct ice_aqc_fw_logging {
+ u8 log_ctrl;
+#define ICE_AQC_FW_LOG_AQ_EN BIT(0)
+#define ICE_AQC_FW_LOG_UART_EN BIT(1)
+ u8 rsvd0;
+ u8 log_ctrl_valid; /* Not used by 0xFF10 Response */
+#define ICE_AQC_FW_LOG_AQ_VALID BIT(0)
+#define ICE_AQC_FW_LOG_UART_VALID BIT(1)
+ u8 rsvd1[5];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+enum ice_aqc_fw_logging_mod {
+ ICE_AQC_FW_LOG_ID_GENERAL = 0,
+ ICE_AQC_FW_LOG_ID_CTRL,
+ ICE_AQC_FW_LOG_ID_LINK,
+ ICE_AQC_FW_LOG_ID_LINK_TOPO,
+ ICE_AQC_FW_LOG_ID_DNL,
+ ICE_AQC_FW_LOG_ID_I2C,
+ ICE_AQC_FW_LOG_ID_SDP,
+ ICE_AQC_FW_LOG_ID_MDIO,
+ ICE_AQC_FW_LOG_ID_ADMINQ,
+ ICE_AQC_FW_LOG_ID_HDMA,
+ ICE_AQC_FW_LOG_ID_LLDP,
+ ICE_AQC_FW_LOG_ID_DCBX,
+ ICE_AQC_FW_LOG_ID_DCB,
+ ICE_AQC_FW_LOG_ID_NETPROXY,
+ ICE_AQC_FW_LOG_ID_NVM,
+ ICE_AQC_FW_LOG_ID_AUTH,
+ ICE_AQC_FW_LOG_ID_VPD,
+ ICE_AQC_FW_LOG_ID_IOSF,
+ ICE_AQC_FW_LOG_ID_PARSER,
+ ICE_AQC_FW_LOG_ID_SW,
+ ICE_AQC_FW_LOG_ID_SCHEDULER,
+ ICE_AQC_FW_LOG_ID_TXQ,
+ ICE_AQC_FW_LOG_ID_RSVD,
+ ICE_AQC_FW_LOG_ID_POST,
+ ICE_AQC_FW_LOG_ID_WATCHDOG,
+ ICE_AQC_FW_LOG_ID_TASK_DISPATCH,
+ ICE_AQC_FW_LOG_ID_MNG,
+ ICE_AQC_FW_LOG_ID_MAX,
+};
+
+/* Defines for both above FW logging command/response buffers */
+#define ICE_AQC_FW_LOG_ID_S 0
+#define ICE_AQC_FW_LOG_ID_M (0xFFF << ICE_AQC_FW_LOG_ID_S)
+
+#define ICE_AQC_FW_LOG_CONF_SUCCESS 0 /* Used by response */
+#define ICE_AQC_FW_LOG_CONF_BAD_INDX BIT(12) /* Used by response */
+
+#define ICE_AQC_FW_LOG_EN_S 12
+#define ICE_AQC_FW_LOG_EN_M (0xF << ICE_AQC_FW_LOG_EN_S)
+#define ICE_AQC_FW_LOG_INFO_EN BIT(12) /* Used by command */
+#define ICE_AQC_FW_LOG_INIT_EN BIT(13) /* Used by command */
+#define ICE_AQC_FW_LOG_FLOW_EN BIT(14) /* Used by command */
+#define ICE_AQC_FW_LOG_ERR_EN BIT(15) /* Used by command */
+
+/* Get/Clear FW Log (indirect 0xFF11) */
+struct ice_aqc_get_clear_fw_log {
+ u8 flags;
+#define ICE_AQC_FW_LOG_CLEAR BIT(0)
+#define ICE_AQC_FW_LOG_MORE_DATA_AVAIL BIT(1)
+ u8 rsvd1[7];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Download Package (indirect 0x0C40) */
+/* Also used for Update Package (indirect 0x0C41 and 0x0C42) */
+struct ice_aqc_download_pkg {
+ u8 flags;
+#define ICE_AQC_DOWNLOAD_PKG_LAST_BUF 0x01
+ u8 reserved[3];
+ __le32 reserved1;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+struct ice_aqc_download_pkg_resp {
+ __le32 error_offset;
+ __le32 error_info;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Get Package Info List (indirect 0x0C43) */
+struct ice_aqc_get_pkg_info_list {
+ __le32 reserved1;
+ __le32 reserved2;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Version format for packages */
+struct ice_pkg_ver {
+ u8 major;
+ u8 minor;
+ u8 update;
+ u8 draft;
+};
+
+#define ICE_PKG_NAME_SIZE 32
+#define ICE_SEG_ID_SIZE 28
+#define ICE_SEG_NAME_SIZE 28
+
+struct ice_aqc_get_pkg_info {
+ struct ice_pkg_ver ver;
+ char name[ICE_SEG_NAME_SIZE];
+ __le32 track_id;
+ u8 is_in_nvm;
+ u8 is_active;
+ u8 is_active_at_boot;
+ u8 is_modified;
+};
+
+/* Get Package Info List response buffer format (0x0C43) */
+struct ice_aqc_get_pkg_info_resp {
+ __le32 count;
+ struct ice_aqc_get_pkg_info pkg_info[];
+};
+
+/* Driver Shared Parameters (direct, 0x0C90) */
+struct ice_aqc_driver_shared_params {
+ u8 set_or_get_op;
+#define ICE_AQC_DRIVER_PARAM_OP_MASK BIT(0)
+#define ICE_AQC_DRIVER_PARAM_SET 0
+#define ICE_AQC_DRIVER_PARAM_GET 1
+ u8 param_indx;
+#define ICE_AQC_DRIVER_PARAM_MAX_IDX 15
+ u8 rsvd[2];
+ __le32 param_val;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+enum ice_aqc_driver_params {
+ /* OS clock index for PTP timer Domain 0 */
+ ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR0 = 0,
+ /* OS clock index for PTP timer Domain 1 */
+ ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR1,
+
+ /* Add new parameters above */
+ ICE_AQC_DRIVER_PARAM_MAX = 16,
+};
+
+/* Lan Queue Overflow Event (direct, 0x1001) */
+struct ice_aqc_event_lan_overflow {
+ __le32 prtdcb_ruptq;
+ __le32 qtx_ctl;
+ u8 reserved[8];
+};
+
+/**
+ * struct ice_aq_desc - Admin Queue (AQ) descriptor
+ * @flags: ICE_AQ_FLAG_* flags
+ * @opcode: AQ command opcode
+ * @datalen: length in bytes of indirect/external data buffer
+ * @retval: return value from firmware
+ * @cookie_high: opaque data high-half
+ * @cookie_low: opaque data low-half
+ * @params: command-specific parameters
+ *
+ * Descriptor format for commands the driver posts on the Admin Transmit Queue
+ * (ATQ). The firmware writes back onto the command descriptor and returns
+ * the result of the command. Asynchronous events that are not an immediate
+ * result of the command are written to the Admin Receive Queue (ARQ) using
+ * the same descriptor format. Descriptors are in little-endian notation with
+ * 32-bit words.
+ */
+struct ice_aq_desc {
+ __le16 flags;
+ __le16 opcode;
+ __le16 datalen;
+ __le16 retval;
+ __le32 cookie_high;
+ __le32 cookie_low;
+ union {
+ u8 raw[16];
+ struct ice_aqc_generic generic;
+ struct ice_aqc_get_ver get_ver;
+ struct ice_aqc_driver_ver driver_ver;
+ struct ice_aqc_q_shutdown q_shutdown;
+ struct ice_aqc_req_res res_owner;
+ struct ice_aqc_manage_mac_read mac_read;
+ struct ice_aqc_manage_mac_write mac_write;
+ struct ice_aqc_clear_pxe clear_pxe;
+ struct ice_aqc_list_caps get_cap;
+ struct ice_aqc_get_phy_caps get_phy;
+ struct ice_aqc_set_phy_cfg set_phy;
+ struct ice_aqc_restart_an restart_an;
+ struct ice_aqc_gpio read_write_gpio;
+ struct ice_aqc_sff_eeprom read_write_sff_param;
+ struct ice_aqc_set_port_id_led set_port_id_led;
+ struct ice_aqc_get_port_options get_port_options;
+ struct ice_aqc_set_port_option set_port_option;
+ struct ice_aqc_get_sw_cfg get_sw_conf;
+ struct ice_aqc_set_port_params set_port_params;
+ struct ice_aqc_sw_rules sw_rules;
+ struct ice_aqc_add_get_recipe add_get_recipe;
+ struct ice_aqc_recipe_to_profile recipe_to_profile;
+ struct ice_aqc_get_topo get_topo;
+ struct ice_aqc_sched_elem_cmd sched_elem_cmd;
+ struct ice_aqc_query_txsched_res query_sched_res;
+ struct ice_aqc_query_port_ets port_ets;
+ struct ice_aqc_rl_profile rl_profile;
+ struct ice_aqc_nvm nvm;
+ struct ice_aqc_nvm_checksum nvm_checksum;
+ struct ice_aqc_nvm_pkg_data pkg_data;
+ struct ice_aqc_nvm_pass_comp_tbl pass_comp_tbl;
+ struct ice_aqc_pf_vf_msg virt;
+ struct ice_aqc_set_query_pfc_mode set_query_pfc_mode;
+ struct ice_aqc_lldp_get_mib lldp_get_mib;
+ struct ice_aqc_lldp_set_mib_change lldp_set_event;
+ struct ice_aqc_lldp_stop lldp_stop;
+ struct ice_aqc_lldp_start lldp_start;
+ struct ice_aqc_lldp_set_local_mib lldp_set_mib;
+ struct ice_aqc_lldp_stop_start_specific_agent lldp_agent_ctrl;
+ struct ice_aqc_lldp_filter_ctrl lldp_filter_ctrl;
+ struct ice_aqc_get_set_rss_lut get_set_rss_lut;
+ struct ice_aqc_get_set_rss_key get_set_rss_key;
+ struct ice_aqc_neigh_dev_req neigh_dev;
+ struct ice_aqc_add_txqs add_txqs;
+ struct ice_aqc_dis_txqs dis_txqs;
+ struct ice_aqc_add_rdma_qset add_rdma_qset;
+ struct ice_aqc_add_get_update_free_vsi vsi_cmd;
+ struct ice_aqc_add_update_free_vsi_resp add_update_free_vsi_res;
+ struct ice_aqc_fw_logging fw_logging;
+ struct ice_aqc_get_clear_fw_log get_clear_fw_log;
+ struct ice_aqc_download_pkg download_pkg;
+ struct ice_aqc_driver_shared_params drv_shared_params;
+ struct ice_aqc_set_mac_lb set_mac_lb;
+ struct ice_aqc_alloc_free_res_cmd sw_res_ctrl;
+ struct ice_aqc_set_mac_cfg set_mac_cfg;
+ struct ice_aqc_set_event_mask set_event_mask;
+ struct ice_aqc_get_link_status get_link_status;
+ struct ice_aqc_event_lan_overflow lan_overflow;
+ struct ice_aqc_get_link_topo get_link_topo;
+ struct ice_aqc_i2c read_write_i2c;
+ struct ice_aqc_read_i2c_resp read_i2c_resp;
+ } params;
+};
+
+/* FW defined boundary for a large buffer, 4k >= Large buffer > 512 bytes */
+#define ICE_AQ_LG_BUF 512
+
+#define ICE_AQ_FLAG_ERR_S 2
+#define ICE_AQ_FLAG_LB_S 9
+#define ICE_AQ_FLAG_RD_S 10
+#define ICE_AQ_FLAG_BUF_S 12
+#define ICE_AQ_FLAG_SI_S 13
+
+#define ICE_AQ_FLAG_ERR BIT(ICE_AQ_FLAG_ERR_S) /* 0x4 */
+#define ICE_AQ_FLAG_LB BIT(ICE_AQ_FLAG_LB_S) /* 0x200 */
+#define ICE_AQ_FLAG_RD BIT(ICE_AQ_FLAG_RD_S) /* 0x400 */
+#define ICE_AQ_FLAG_BUF BIT(ICE_AQ_FLAG_BUF_S) /* 0x1000 */
+#define ICE_AQ_FLAG_SI BIT(ICE_AQ_FLAG_SI_S) /* 0x2000 */
+
+/* error codes */
+enum ice_aq_err {
+ ICE_AQ_RC_OK = 0, /* Success */
+ ICE_AQ_RC_EPERM = 1, /* Operation not permitted */
+ ICE_AQ_RC_ENOENT = 2, /* No such element */
+ ICE_AQ_RC_ENOMEM = 9, /* Out of memory */
+ ICE_AQ_RC_EBUSY = 12, /* Device or resource busy */
+ ICE_AQ_RC_EEXIST = 13, /* Object already exists */
+ ICE_AQ_RC_EINVAL = 14, /* Invalid argument */
+ ICE_AQ_RC_ENOSPC = 16, /* No space left or allocation failure */
+ ICE_AQ_RC_ENOSYS = 17, /* Function not implemented */
+ ICE_AQ_RC_EMODE = 21, /* Op not allowed in current dev mode */
+ ICE_AQ_RC_ENOSEC = 24, /* Missing security manifest */
+ ICE_AQ_RC_EBADSIG = 25, /* Bad RSA signature */
+ ICE_AQ_RC_ESVN = 26, /* SVN number prohibits this package */
+ ICE_AQ_RC_EBADMAN = 27, /* Manifest hash mismatch */
+ ICE_AQ_RC_EBADBUF = 28, /* Buffer hash mismatches manifest */
+};
+
+/* Admin Queue command opcodes */
+enum ice_adminq_opc {
+ /* AQ commands */
+ ice_aqc_opc_get_ver = 0x0001,
+ ice_aqc_opc_driver_ver = 0x0002,
+ ice_aqc_opc_q_shutdown = 0x0003,
+
+ /* resource ownership */
+ ice_aqc_opc_req_res = 0x0008,
+ ice_aqc_opc_release_res = 0x0009,
+
+ /* device/function capabilities */
+ ice_aqc_opc_list_func_caps = 0x000A,
+ ice_aqc_opc_list_dev_caps = 0x000B,
+
+ /* manage MAC address */
+ ice_aqc_opc_manage_mac_read = 0x0107,
+ ice_aqc_opc_manage_mac_write = 0x0108,
+
+ /* PXE */
+ ice_aqc_opc_clear_pxe_mode = 0x0110,
+
+ /* internal switch commands */
+ ice_aqc_opc_get_sw_cfg = 0x0200,
+ ice_aqc_opc_set_port_params = 0x0203,
+
+ /* Alloc/Free/Get Resources */
+ ice_aqc_opc_alloc_res = 0x0208,
+ ice_aqc_opc_free_res = 0x0209,
+ ice_aqc_opc_set_vlan_mode_parameters = 0x020C,
+ ice_aqc_opc_get_vlan_mode_parameters = 0x020D,
+
+ /* VSI commands */
+ ice_aqc_opc_add_vsi = 0x0210,
+ ice_aqc_opc_update_vsi = 0x0211,
+ ice_aqc_opc_free_vsi = 0x0213,
+
+ /* recipe commands */
+ ice_aqc_opc_add_recipe = 0x0290,
+ ice_aqc_opc_recipe_to_profile = 0x0291,
+ ice_aqc_opc_get_recipe = 0x0292,
+ ice_aqc_opc_get_recipe_to_profile = 0x0293,
+
+ /* switch rules population commands */
+ ice_aqc_opc_add_sw_rules = 0x02A0,
+ ice_aqc_opc_update_sw_rules = 0x02A1,
+ ice_aqc_opc_remove_sw_rules = 0x02A2,
+
+ ice_aqc_opc_clear_pf_cfg = 0x02A4,
+
+ /* DCB commands */
+ ice_aqc_opc_query_pfc_mode = 0x0302,
+ ice_aqc_opc_set_pfc_mode = 0x0303,
+
+ /* transmit scheduler commands */
+ ice_aqc_opc_get_dflt_topo = 0x0400,
+ ice_aqc_opc_add_sched_elems = 0x0401,
+ ice_aqc_opc_cfg_sched_elems = 0x0403,
+ ice_aqc_opc_get_sched_elems = 0x0404,
+ ice_aqc_opc_move_sched_elems = 0x0408,
+ ice_aqc_opc_suspend_sched_elems = 0x0409,
+ ice_aqc_opc_resume_sched_elems = 0x040A,
+ ice_aqc_opc_query_port_ets = 0x040E,
+ ice_aqc_opc_delete_sched_elems = 0x040F,
+ ice_aqc_opc_add_rl_profiles = 0x0410,
+ ice_aqc_opc_query_sched_res = 0x0412,
+ ice_aqc_opc_remove_rl_profiles = 0x0415,
+
+ /* PHY commands */
+ ice_aqc_opc_get_phy_caps = 0x0600,
+ ice_aqc_opc_set_phy_cfg = 0x0601,
+ ice_aqc_opc_set_mac_cfg = 0x0603,
+ ice_aqc_opc_restart_an = 0x0605,
+ ice_aqc_opc_get_link_status = 0x0607,
+ ice_aqc_opc_set_event_mask = 0x0613,
+ ice_aqc_opc_set_mac_lb = 0x0620,
+ ice_aqc_opc_get_link_topo = 0x06E0,
+ ice_aqc_opc_read_i2c = 0x06E2,
+ ice_aqc_opc_write_i2c = 0x06E3,
+ ice_aqc_opc_set_port_id_led = 0x06E9,
+ ice_aqc_opc_get_port_options = 0x06EA,
+ ice_aqc_opc_set_port_option = 0x06EB,
+ ice_aqc_opc_set_gpio = 0x06EC,
+ ice_aqc_opc_get_gpio = 0x06ED,
+ ice_aqc_opc_sff_eeprom = 0x06EE,
+
+ /* NVM commands */
+ ice_aqc_opc_nvm_read = 0x0701,
+ ice_aqc_opc_nvm_erase = 0x0702,
+ ice_aqc_opc_nvm_write = 0x0703,
+ ice_aqc_opc_nvm_checksum = 0x0706,
+ ice_aqc_opc_nvm_write_activate = 0x0707,
+ ice_aqc_opc_nvm_update_empr = 0x0709,
+ ice_aqc_opc_nvm_pkg_data = 0x070A,
+ ice_aqc_opc_nvm_pass_component_tbl = 0x070B,
+
+ /* PF/VF mailbox commands */
+ ice_mbx_opc_send_msg_to_pf = 0x0801,
+ ice_mbx_opc_send_msg_to_vf = 0x0802,
+ /* LLDP commands */
+ ice_aqc_opc_lldp_get_mib = 0x0A00,
+ ice_aqc_opc_lldp_set_mib_change = 0x0A01,
+ ice_aqc_opc_lldp_stop = 0x0A05,
+ ice_aqc_opc_lldp_start = 0x0A06,
+ ice_aqc_opc_get_cee_dcb_cfg = 0x0A07,
+ ice_aqc_opc_lldp_set_local_mib = 0x0A08,
+ ice_aqc_opc_lldp_stop_start_specific_agent = 0x0A09,
+ ice_aqc_opc_lldp_filter_ctrl = 0x0A0A,
+
+ /* RSS commands */
+ ice_aqc_opc_set_rss_key = 0x0B02,
+ ice_aqc_opc_set_rss_lut = 0x0B03,
+ ice_aqc_opc_get_rss_key = 0x0B04,
+ ice_aqc_opc_get_rss_lut = 0x0B05,
+
+ /* Sideband Control Interface commands */
+ ice_aqc_opc_neighbour_device_request = 0x0C00,
+
+ /* Tx queue handling commands/events */
+ ice_aqc_opc_add_txqs = 0x0C30,
+ ice_aqc_opc_dis_txqs = 0x0C31,
+ ice_aqc_opc_add_rdma_qset = 0x0C33,
+
+ /* package commands */
+ ice_aqc_opc_download_pkg = 0x0C40,
+ ice_aqc_opc_upload_section = 0x0C41,
+ ice_aqc_opc_update_pkg = 0x0C42,
+ ice_aqc_opc_get_pkg_info_list = 0x0C43,
+
+ ice_aqc_opc_driver_shared_params = 0x0C90,
+
+ /* Standalone Commands/Events */
+ ice_aqc_opc_event_lan_overflow = 0x1001,
+
+ /* debug commands */
+ ice_aqc_opc_fw_logging = 0xFF09,
+ ice_aqc_opc_fw_logging_info = 0xFF10,
+};
+
+#endif /* _ICE_ADMINQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c
new file mode 100644
index 000000000..fba178e07
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_arfs.c
@@ -0,0 +1,656 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#include "ice.h"
+
+/**
+ * ice_is_arfs_active - helper to check is aRFS is active
+ * @vsi: VSI to check
+ */
+static bool ice_is_arfs_active(struct ice_vsi *vsi)
+{
+ return !!vsi->arfs_fltr_list;
+}
+
+/**
+ * ice_is_arfs_using_perfect_flow - check if aRFS has active perfect filters
+ * @hw: pointer to the HW structure
+ * @flow_type: flow type as Flow Director understands it
+ *
+ * Flow Director will query this function to see if aRFS is currently using
+ * the specified flow_type for perfect (4-tuple) filters.
+ */
+bool
+ice_is_arfs_using_perfect_flow(struct ice_hw *hw, enum ice_fltr_ptype flow_type)
+{
+ struct ice_arfs_active_fltr_cntrs *arfs_fltr_cntrs;
+ struct ice_pf *pf = hw->back;
+ struct ice_vsi *vsi;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi)
+ return false;
+
+ arfs_fltr_cntrs = vsi->arfs_fltr_cntrs;
+
+ /* active counters can be updated by multiple CPUs */
+ smp_mb__before_atomic();
+ switch (flow_type) {
+ case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+ return atomic_read(&arfs_fltr_cntrs->active_udpv4_cnt) > 0;
+ case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+ return atomic_read(&arfs_fltr_cntrs->active_udpv6_cnt) > 0;
+ case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+ return atomic_read(&arfs_fltr_cntrs->active_tcpv4_cnt) > 0;
+ case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+ return atomic_read(&arfs_fltr_cntrs->active_tcpv6_cnt) > 0;
+ default:
+ return false;
+ }
+}
+
+/**
+ * ice_arfs_update_active_fltr_cntrs - update active filter counters for aRFS
+ * @vsi: VSI that aRFS is active on
+ * @entry: aRFS entry used to change counters
+ * @add: true to increment counter, false to decrement
+ */
+static void
+ice_arfs_update_active_fltr_cntrs(struct ice_vsi *vsi,
+ struct ice_arfs_entry *entry, bool add)
+{
+ struct ice_arfs_active_fltr_cntrs *fltr_cntrs = vsi->arfs_fltr_cntrs;
+
+ switch (entry->fltr_info.flow_type) {
+ case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+ if (add)
+ atomic_inc(&fltr_cntrs->active_tcpv4_cnt);
+ else
+ atomic_dec(&fltr_cntrs->active_tcpv4_cnt);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+ if (add)
+ atomic_inc(&fltr_cntrs->active_tcpv6_cnt);
+ else
+ atomic_dec(&fltr_cntrs->active_tcpv6_cnt);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+ if (add)
+ atomic_inc(&fltr_cntrs->active_udpv4_cnt);
+ else
+ atomic_dec(&fltr_cntrs->active_udpv4_cnt);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+ if (add)
+ atomic_inc(&fltr_cntrs->active_udpv6_cnt);
+ else
+ atomic_dec(&fltr_cntrs->active_udpv6_cnt);
+ break;
+ default:
+ dev_err(ice_pf_to_dev(vsi->back), "aRFS: Failed to update filter counters, invalid filter type %d\n",
+ entry->fltr_info.flow_type);
+ }
+}
+
+/**
+ * ice_arfs_del_flow_rules - delete the rules passed in from HW
+ * @vsi: VSI for the flow rules that need to be deleted
+ * @del_list_head: head of the list of ice_arfs_entry(s) for rule deletion
+ *
+ * Loop through the delete list passed in and remove the rules from HW. After
+ * each rule is deleted, disconnect and free the ice_arfs_entry because it is no
+ * longer being referenced by the aRFS hash table.
+ */
+static void
+ice_arfs_del_flow_rules(struct ice_vsi *vsi, struct hlist_head *del_list_head)
+{
+ struct ice_arfs_entry *e;
+ struct hlist_node *n;
+ struct device *dev;
+
+ dev = ice_pf_to_dev(vsi->back);
+
+ hlist_for_each_entry_safe(e, n, del_list_head, list_entry) {
+ int result;
+
+ result = ice_fdir_write_fltr(vsi->back, &e->fltr_info, false,
+ false);
+ if (!result)
+ ice_arfs_update_active_fltr_cntrs(vsi, e, false);
+ else
+ dev_dbg(dev, "Unable to delete aRFS entry, err %d fltr_state %d fltr_id %d flow_id %d Q %d\n",
+ result, e->fltr_state, e->fltr_info.fltr_id,
+ e->flow_id, e->fltr_info.q_index);
+
+ /* The aRFS hash table is no longer referencing this entry */
+ hlist_del(&e->list_entry);
+ devm_kfree(dev, e);
+ }
+}
+
+/**
+ * ice_arfs_add_flow_rules - add the rules passed in from HW
+ * @vsi: VSI for the flow rules that need to be added
+ * @add_list_head: head of the list of ice_arfs_entry_ptr(s) for rule addition
+ *
+ * Loop through the add list passed in and remove the rules from HW. After each
+ * rule is added, disconnect and free the ice_arfs_entry_ptr node. Don't free
+ * the ice_arfs_entry(s) because they are still being referenced in the aRFS
+ * hash table.
+ */
+static void
+ice_arfs_add_flow_rules(struct ice_vsi *vsi, struct hlist_head *add_list_head)
+{
+ struct ice_arfs_entry_ptr *ep;
+ struct hlist_node *n;
+ struct device *dev;
+
+ dev = ice_pf_to_dev(vsi->back);
+
+ hlist_for_each_entry_safe(ep, n, add_list_head, list_entry) {
+ int result;
+
+ result = ice_fdir_write_fltr(vsi->back,
+ &ep->arfs_entry->fltr_info, true,
+ false);
+ if (!result)
+ ice_arfs_update_active_fltr_cntrs(vsi, ep->arfs_entry,
+ true);
+ else
+ dev_dbg(dev, "Unable to add aRFS entry, err %d fltr_state %d fltr_id %d flow_id %d Q %d\n",
+ result, ep->arfs_entry->fltr_state,
+ ep->arfs_entry->fltr_info.fltr_id,
+ ep->arfs_entry->flow_id,
+ ep->arfs_entry->fltr_info.q_index);
+
+ hlist_del(&ep->list_entry);
+ devm_kfree(dev, ep);
+ }
+}
+
+/**
+ * ice_arfs_is_flow_expired - check if the aRFS entry has expired
+ * @vsi: VSI containing the aRFS entry
+ * @arfs_entry: aRFS entry that's being checked for expiration
+ *
+ * Return true if the flow has expired, else false. This function should be used
+ * to determine whether or not an aRFS entry should be removed from the hardware
+ * and software structures.
+ */
+static bool
+ice_arfs_is_flow_expired(struct ice_vsi *vsi, struct ice_arfs_entry *arfs_entry)
+{
+#define ICE_ARFS_TIME_DELTA_EXPIRATION msecs_to_jiffies(5000)
+ if (rps_may_expire_flow(vsi->netdev, arfs_entry->fltr_info.q_index,
+ arfs_entry->flow_id,
+ arfs_entry->fltr_info.fltr_id))
+ return true;
+
+ /* expiration timer only used for UDP filters */
+ if (arfs_entry->fltr_info.flow_type != ICE_FLTR_PTYPE_NONF_IPV4_UDP &&
+ arfs_entry->fltr_info.flow_type != ICE_FLTR_PTYPE_NONF_IPV6_UDP)
+ return false;
+
+ return time_in_range64(arfs_entry->time_activated +
+ ICE_ARFS_TIME_DELTA_EXPIRATION,
+ arfs_entry->time_activated, get_jiffies_64());
+}
+
+/**
+ * ice_arfs_update_flow_rules - add/delete aRFS rules in HW
+ * @vsi: the VSI to be forwarded to
+ * @idx: index into the table of aRFS filter lists. Obtained from skb->hash
+ * @add_list: list to populate with filters to be added to Flow Director
+ * @del_list: list to populate with filters to be deleted from Flow Director
+ *
+ * Iterate over the hlist at the index given in the aRFS hash table and
+ * determine if there are any aRFS entries that need to be either added or
+ * deleted in the HW. If the aRFS entry is marked as ICE_ARFS_INACTIVE the
+ * filter needs to be added to HW, else if it's marked as ICE_ARFS_ACTIVE and
+ * the flow has expired delete the filter from HW. The caller of this function
+ * is expected to add/delete rules on the add_list/del_list respectively.
+ */
+static void
+ice_arfs_update_flow_rules(struct ice_vsi *vsi, u16 idx,
+ struct hlist_head *add_list,
+ struct hlist_head *del_list)
+{
+ struct ice_arfs_entry *e;
+ struct hlist_node *n;
+ struct device *dev;
+
+ dev = ice_pf_to_dev(vsi->back);
+
+ /* go through the aRFS hlist at this idx and check for needed updates */
+ hlist_for_each_entry_safe(e, n, &vsi->arfs_fltr_list[idx], list_entry)
+ /* check if filter needs to be added to HW */
+ if (e->fltr_state == ICE_ARFS_INACTIVE) {
+ enum ice_fltr_ptype flow_type = e->fltr_info.flow_type;
+ struct ice_arfs_entry_ptr *ep =
+ devm_kzalloc(dev, sizeof(*ep), GFP_ATOMIC);
+
+ if (!ep)
+ continue;
+ INIT_HLIST_NODE(&ep->list_entry);
+ /* reference aRFS entry to add HW filter */
+ ep->arfs_entry = e;
+ hlist_add_head(&ep->list_entry, add_list);
+ e->fltr_state = ICE_ARFS_ACTIVE;
+ /* expiration timer only used for UDP flows */
+ if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+ flow_type == ICE_FLTR_PTYPE_NONF_IPV6_UDP)
+ e->time_activated = get_jiffies_64();
+ } else if (e->fltr_state == ICE_ARFS_ACTIVE) {
+ /* check if filter needs to be removed from HW */
+ if (ice_arfs_is_flow_expired(vsi, e)) {
+ /* remove aRFS entry from hash table for delete
+ * and to prevent referencing it the next time
+ * through this hlist index
+ */
+ hlist_del(&e->list_entry);
+ e->fltr_state = ICE_ARFS_TODEL;
+ /* save reference to aRFS entry for delete */
+ hlist_add_head(&e->list_entry, del_list);
+ }
+ }
+}
+
+/**
+ * ice_sync_arfs_fltrs - update all aRFS filters
+ * @pf: board private structure
+ */
+void ice_sync_arfs_fltrs(struct ice_pf *pf)
+{
+ HLIST_HEAD(tmp_del_list);
+ HLIST_HEAD(tmp_add_list);
+ struct ice_vsi *pf_vsi;
+ unsigned int i;
+
+ pf_vsi = ice_get_main_vsi(pf);
+ if (!pf_vsi)
+ return;
+
+ if (!ice_is_arfs_active(pf_vsi))
+ return;
+
+ spin_lock_bh(&pf_vsi->arfs_lock);
+ /* Once we process aRFS for the PF VSI get out */
+ for (i = 0; i < ICE_MAX_ARFS_LIST; i++)
+ ice_arfs_update_flow_rules(pf_vsi, i, &tmp_add_list,
+ &tmp_del_list);
+ spin_unlock_bh(&pf_vsi->arfs_lock);
+
+ /* use list of ice_arfs_entry(s) for delete */
+ ice_arfs_del_flow_rules(pf_vsi, &tmp_del_list);
+
+ /* use list of ice_arfs_entry_ptr(s) for add */
+ ice_arfs_add_flow_rules(pf_vsi, &tmp_add_list);
+}
+
+/**
+ * ice_arfs_build_entry - builds an aRFS entry based on input
+ * @vsi: destination VSI for this flow
+ * @fk: flow dissector keys for creating the tuple
+ * @rxq_idx: Rx queue to steer this flow to
+ * @flow_id: passed down from the stack and saved for flow expiration
+ *
+ * returns an aRFS entry on success and NULL on failure
+ */
+static struct ice_arfs_entry *
+ice_arfs_build_entry(struct ice_vsi *vsi, const struct flow_keys *fk,
+ u16 rxq_idx, u32 flow_id)
+{
+ struct ice_arfs_entry *arfs_entry;
+ struct ice_fdir_fltr *fltr_info;
+ u8 ip_proto;
+
+ arfs_entry = devm_kzalloc(ice_pf_to_dev(vsi->back),
+ sizeof(*arfs_entry),
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (!arfs_entry)
+ return NULL;
+
+ fltr_info = &arfs_entry->fltr_info;
+ fltr_info->q_index = rxq_idx;
+ fltr_info->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX;
+ fltr_info->dest_vsi = vsi->idx;
+ ip_proto = fk->basic.ip_proto;
+
+ if (fk->basic.n_proto == htons(ETH_P_IP)) {
+ fltr_info->ip.v4.proto = ip_proto;
+ fltr_info->flow_type = (ip_proto == IPPROTO_TCP) ?
+ ICE_FLTR_PTYPE_NONF_IPV4_TCP :
+ ICE_FLTR_PTYPE_NONF_IPV4_UDP;
+ fltr_info->ip.v4.src_ip = fk->addrs.v4addrs.src;
+ fltr_info->ip.v4.dst_ip = fk->addrs.v4addrs.dst;
+ fltr_info->ip.v4.src_port = fk->ports.src;
+ fltr_info->ip.v4.dst_port = fk->ports.dst;
+ } else { /* ETH_P_IPV6 */
+ fltr_info->ip.v6.proto = ip_proto;
+ fltr_info->flow_type = (ip_proto == IPPROTO_TCP) ?
+ ICE_FLTR_PTYPE_NONF_IPV6_TCP :
+ ICE_FLTR_PTYPE_NONF_IPV6_UDP;
+ memcpy(&fltr_info->ip.v6.src_ip, &fk->addrs.v6addrs.src,
+ sizeof(struct in6_addr));
+ memcpy(&fltr_info->ip.v6.dst_ip, &fk->addrs.v6addrs.dst,
+ sizeof(struct in6_addr));
+ fltr_info->ip.v6.src_port = fk->ports.src;
+ fltr_info->ip.v6.dst_port = fk->ports.dst;
+ }
+
+ arfs_entry->flow_id = flow_id;
+ fltr_info->fltr_id =
+ atomic_inc_return(vsi->arfs_last_fltr_id) % RPS_NO_FILTER;
+
+ return arfs_entry;
+}
+
+/**
+ * ice_arfs_is_perfect_flow_set - Check to see if perfect flow is set
+ * @hw: pointer to HW structure
+ * @l3_proto: ETH_P_IP or ETH_P_IPV6 in network order
+ * @l4_proto: IPPROTO_UDP or IPPROTO_TCP
+ *
+ * We only support perfect (4-tuple) filters for aRFS. This function allows aRFS
+ * to check if perfect (4-tuple) flow rules are currently in place by Flow
+ * Director.
+ */
+static bool
+ice_arfs_is_perfect_flow_set(struct ice_hw *hw, __be16 l3_proto, u8 l4_proto)
+{
+ unsigned long *perfect_fltr = hw->fdir_perfect_fltr;
+
+ /* advanced Flow Director disabled, perfect filters always supported */
+ if (!perfect_fltr)
+ return true;
+
+ if (l3_proto == htons(ETH_P_IP) && l4_proto == IPPROTO_UDP)
+ return test_bit(ICE_FLTR_PTYPE_NONF_IPV4_UDP, perfect_fltr);
+ else if (l3_proto == htons(ETH_P_IP) && l4_proto == IPPROTO_TCP)
+ return test_bit(ICE_FLTR_PTYPE_NONF_IPV4_TCP, perfect_fltr);
+ else if (l3_proto == htons(ETH_P_IPV6) && l4_proto == IPPROTO_UDP)
+ return test_bit(ICE_FLTR_PTYPE_NONF_IPV6_UDP, perfect_fltr);
+ else if (l3_proto == htons(ETH_P_IPV6) && l4_proto == IPPROTO_TCP)
+ return test_bit(ICE_FLTR_PTYPE_NONF_IPV6_TCP, perfect_fltr);
+
+ return false;
+}
+
+/**
+ * ice_rx_flow_steer - steer the Rx flow to where application is being run
+ * @netdev: ptr to the netdev being adjusted
+ * @skb: buffer with required header information
+ * @rxq_idx: queue to which the flow needs to move
+ * @flow_id: flow identifier provided by the netdev
+ *
+ * Based on the skb, rxq_idx, and flow_id passed in add/update an entry in the
+ * aRFS hash table. Iterate over one of the hlists in the aRFS hash table and
+ * if the flow_id already exists in the hash table but the rxq_idx has changed
+ * mark the entry as ICE_ARFS_INACTIVE so it can get updated in HW, else
+ * if the entry is marked as ICE_ARFS_TODEL delete it from the aRFS hash table.
+ * If neither of the previous conditions are true then add a new entry in the
+ * aRFS hash table, which gets set to ICE_ARFS_INACTIVE by default so it can be
+ * added to HW.
+ */
+int
+ice_rx_flow_steer(struct net_device *netdev, const struct sk_buff *skb,
+ u16 rxq_idx, u32 flow_id)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_arfs_entry *arfs_entry;
+ struct ice_vsi *vsi = np->vsi;
+ struct flow_keys fk;
+ struct ice_pf *pf;
+ __be16 n_proto;
+ u8 ip_proto;
+ u16 idx;
+ int ret;
+
+ /* failed to allocate memory for aRFS so don't crash */
+ if (unlikely(!vsi->arfs_fltr_list))
+ return -ENODEV;
+
+ pf = vsi->back;
+
+ if (skb->encapsulation)
+ return -EPROTONOSUPPORT;
+
+ if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
+ return -EPROTONOSUPPORT;
+
+ n_proto = fk.basic.n_proto;
+ /* Support only IPV4 and IPV6 */
+ if ((n_proto == htons(ETH_P_IP) && !ip_is_fragment(ip_hdr(skb))) ||
+ n_proto == htons(ETH_P_IPV6))
+ ip_proto = fk.basic.ip_proto;
+ else
+ return -EPROTONOSUPPORT;
+
+ /* Support only TCP and UDP */
+ if (ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP)
+ return -EPROTONOSUPPORT;
+
+ /* only support 4-tuple filters for aRFS */
+ if (!ice_arfs_is_perfect_flow_set(&pf->hw, n_proto, ip_proto))
+ return -EOPNOTSUPP;
+
+ /* choose the aRFS list bucket based on skb hash */
+ idx = skb_get_hash_raw(skb) & ICE_ARFS_LST_MASK;
+ /* search for entry in the bucket */
+ spin_lock_bh(&vsi->arfs_lock);
+ hlist_for_each_entry(arfs_entry, &vsi->arfs_fltr_list[idx],
+ list_entry) {
+ struct ice_fdir_fltr *fltr_info;
+
+ /* keep searching for the already existing arfs_entry flow */
+ if (arfs_entry->flow_id != flow_id)
+ continue;
+
+ fltr_info = &arfs_entry->fltr_info;
+ ret = fltr_info->fltr_id;
+
+ if (fltr_info->q_index == rxq_idx ||
+ arfs_entry->fltr_state != ICE_ARFS_ACTIVE)
+ goto out;
+
+ /* update the queue to forward to on an already existing flow */
+ fltr_info->q_index = rxq_idx;
+ arfs_entry->fltr_state = ICE_ARFS_INACTIVE;
+ ice_arfs_update_active_fltr_cntrs(vsi, arfs_entry, false);
+ goto out_schedule_service_task;
+ }
+
+ arfs_entry = ice_arfs_build_entry(vsi, &fk, rxq_idx, flow_id);
+ if (!arfs_entry) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = arfs_entry->fltr_info.fltr_id;
+ INIT_HLIST_NODE(&arfs_entry->list_entry);
+ hlist_add_head(&arfs_entry->list_entry, &vsi->arfs_fltr_list[idx]);
+out_schedule_service_task:
+ ice_service_task_schedule(pf);
+out:
+ spin_unlock_bh(&vsi->arfs_lock);
+ return ret;
+}
+
+/**
+ * ice_init_arfs_cntrs - initialize aRFS counter values
+ * @vsi: VSI that aRFS counters need to be initialized on
+ */
+static int ice_init_arfs_cntrs(struct ice_vsi *vsi)
+{
+ if (!vsi || vsi->type != ICE_VSI_PF)
+ return -EINVAL;
+
+ vsi->arfs_fltr_cntrs = kzalloc(sizeof(*vsi->arfs_fltr_cntrs),
+ GFP_KERNEL);
+ if (!vsi->arfs_fltr_cntrs)
+ return -ENOMEM;
+
+ vsi->arfs_last_fltr_id = kzalloc(sizeof(*vsi->arfs_last_fltr_id),
+ GFP_KERNEL);
+ if (!vsi->arfs_last_fltr_id) {
+ kfree(vsi->arfs_fltr_cntrs);
+ vsi->arfs_fltr_cntrs = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_init_arfs - initialize aRFS resources
+ * @vsi: the VSI to be forwarded to
+ */
+void ice_init_arfs(struct ice_vsi *vsi)
+{
+ struct hlist_head *arfs_fltr_list;
+ unsigned int i;
+
+ if (!vsi || vsi->type != ICE_VSI_PF)
+ return;
+
+ arfs_fltr_list = kcalloc(ICE_MAX_ARFS_LIST, sizeof(*arfs_fltr_list),
+ GFP_KERNEL);
+ if (!arfs_fltr_list)
+ return;
+
+ if (ice_init_arfs_cntrs(vsi))
+ goto free_arfs_fltr_list;
+
+ for (i = 0; i < ICE_MAX_ARFS_LIST; i++)
+ INIT_HLIST_HEAD(&arfs_fltr_list[i]);
+
+ spin_lock_init(&vsi->arfs_lock);
+
+ vsi->arfs_fltr_list = arfs_fltr_list;
+
+ return;
+
+free_arfs_fltr_list:
+ kfree(arfs_fltr_list);
+}
+
+/**
+ * ice_clear_arfs - clear the aRFS hash table and any memory used for aRFS
+ * @vsi: the VSI to be forwarded to
+ */
+void ice_clear_arfs(struct ice_vsi *vsi)
+{
+ struct device *dev;
+ unsigned int i;
+
+ if (!vsi || vsi->type != ICE_VSI_PF || !vsi->back ||
+ !vsi->arfs_fltr_list)
+ return;
+
+ dev = ice_pf_to_dev(vsi->back);
+ for (i = 0; i < ICE_MAX_ARFS_LIST; i++) {
+ struct ice_arfs_entry *r;
+ struct hlist_node *n;
+
+ spin_lock_bh(&vsi->arfs_lock);
+ hlist_for_each_entry_safe(r, n, &vsi->arfs_fltr_list[i],
+ list_entry) {
+ hlist_del(&r->list_entry);
+ devm_kfree(dev, r);
+ }
+ spin_unlock_bh(&vsi->arfs_lock);
+ }
+
+ kfree(vsi->arfs_fltr_list);
+ vsi->arfs_fltr_list = NULL;
+ kfree(vsi->arfs_last_fltr_id);
+ vsi->arfs_last_fltr_id = NULL;
+ kfree(vsi->arfs_fltr_cntrs);
+ vsi->arfs_fltr_cntrs = NULL;
+}
+
+/**
+ * ice_free_cpu_rx_rmap - free setup CPU reverse map
+ * @vsi: the VSI to be forwarded to
+ */
+void ice_free_cpu_rx_rmap(struct ice_vsi *vsi)
+{
+ struct net_device *netdev;
+
+ if (!vsi || vsi->type != ICE_VSI_PF)
+ return;
+
+ netdev = vsi->netdev;
+ if (!netdev || !netdev->rx_cpu_rmap)
+ return;
+
+ free_irq_cpu_rmap(netdev->rx_cpu_rmap);
+ netdev->rx_cpu_rmap = NULL;
+}
+
+/**
+ * ice_set_cpu_rx_rmap - setup CPU reverse map for each queue
+ * @vsi: the VSI to be forwarded to
+ */
+int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
+{
+ struct net_device *netdev;
+ struct ice_pf *pf;
+ int base_idx, i;
+
+ if (!vsi || vsi->type != ICE_VSI_PF)
+ return 0;
+
+ pf = vsi->back;
+ netdev = vsi->netdev;
+ if (!pf || !netdev || !vsi->num_q_vectors)
+ return -EINVAL;
+
+ netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s, q_vectors %d\n",
+ vsi->type, netdev->name, vsi->num_q_vectors);
+
+ netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(vsi->num_q_vectors);
+ if (unlikely(!netdev->rx_cpu_rmap))
+ return -EINVAL;
+
+ base_idx = vsi->base_vector;
+ ice_for_each_q_vector(vsi, i)
+ if (irq_cpu_rmap_add(netdev->rx_cpu_rmap,
+ pf->msix_entries[base_idx + i].vector)) {
+ ice_free_cpu_rx_rmap(vsi);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_remove_arfs - remove/clear all aRFS resources
+ * @pf: device private structure
+ */
+void ice_remove_arfs(struct ice_pf *pf)
+{
+ struct ice_vsi *pf_vsi;
+
+ pf_vsi = ice_get_main_vsi(pf);
+ if (!pf_vsi)
+ return;
+
+ ice_clear_arfs(pf_vsi);
+}
+
+/**
+ * ice_rebuild_arfs - remove/clear all aRFS resources and rebuild after reset
+ * @pf: device private structure
+ */
+void ice_rebuild_arfs(struct ice_pf *pf)
+{
+ struct ice_vsi *pf_vsi;
+
+ pf_vsi = ice_get_main_vsi(pf);
+ if (!pf_vsi)
+ return;
+
+ ice_remove_arfs(pf);
+ ice_init_arfs(pf_vsi);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.h b/drivers/net/ethernet/intel/ice/ice_arfs.h
new file mode 100644
index 000000000..9669ad9bf
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_arfs.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#ifndef _ICE_ARFS_H_
+#define _ICE_ARFS_H_
+
+#include "ice_fdir.h"
+
+enum ice_arfs_fltr_state {
+ ICE_ARFS_INACTIVE,
+ ICE_ARFS_ACTIVE,
+ ICE_ARFS_TODEL,
+};
+
+struct ice_arfs_entry {
+ struct ice_fdir_fltr fltr_info;
+ struct hlist_node list_entry;
+ u64 time_activated; /* only valid for UDP flows */
+ u32 flow_id;
+ /* fltr_state = 0 - ICE_ARFS_INACTIVE:
+ * filter needs to be updated or programmed in HW.
+ * fltr_state = 1 - ICE_ARFS_ACTIVE:
+ * filter is active and programmed in HW.
+ * fltr_state = 2 - ICE_ARFS_TODEL:
+ * filter has been deleted from HW and needs to be removed from
+ * the aRFS hash table.
+ */
+ u8 fltr_state;
+};
+
+struct ice_arfs_entry_ptr {
+ struct ice_arfs_entry *arfs_entry;
+ struct hlist_node list_entry;
+};
+
+struct ice_arfs_active_fltr_cntrs {
+ atomic_t active_tcpv4_cnt;
+ atomic_t active_tcpv6_cnt;
+ atomic_t active_udpv4_cnt;
+ atomic_t active_udpv6_cnt;
+};
+
+#ifdef CONFIG_RFS_ACCEL
+int
+ice_rx_flow_steer(struct net_device *netdev, const struct sk_buff *skb,
+ u16 rxq_idx, u32 flow_id);
+void ice_clear_arfs(struct ice_vsi *vsi);
+void ice_free_cpu_rx_rmap(struct ice_vsi *vsi);
+void ice_init_arfs(struct ice_vsi *vsi);
+void ice_sync_arfs_fltrs(struct ice_pf *pf);
+int ice_set_cpu_rx_rmap(struct ice_vsi *vsi);
+void ice_remove_arfs(struct ice_pf *pf);
+void ice_rebuild_arfs(struct ice_pf *pf);
+bool
+ice_is_arfs_using_perfect_flow(struct ice_hw *hw,
+ enum ice_fltr_ptype flow_type);
+#else
+static inline void ice_clear_arfs(struct ice_vsi *vsi) { }
+static inline void ice_free_cpu_rx_rmap(struct ice_vsi *vsi) { }
+static inline void ice_init_arfs(struct ice_vsi *vsi) { }
+static inline void ice_sync_arfs_fltrs(struct ice_pf *pf) { }
+static inline void ice_remove_arfs(struct ice_pf *pf) { }
+static inline void ice_rebuild_arfs(struct ice_pf *pf) { }
+
+static inline int ice_set_cpu_rx_rmap(struct ice_vsi __always_unused *vsi)
+{
+ return 0;
+}
+
+static inline int
+ice_rx_flow_steer(struct net_device __always_unused *netdev,
+ const struct sk_buff __always_unused *skb,
+ u16 __always_unused rxq_idx, u32 __always_unused flow_id)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline bool
+ice_is_arfs_using_perfect_flow(struct ice_hw __always_unused *hw,
+ enum ice_fltr_ptype __always_unused flow_type)
+{
+ return false;
+}
+#endif /* CONFIG_RFS_ACCEL */
+#endif /* _ICE_ARFS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
new file mode 100644
index 000000000..818eca6aa
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -0,0 +1,1019 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include <net/xdp_sock_drv.h>
+#include "ice_base.h"
+#include "ice_lib.h"
+#include "ice_dcb_lib.h"
+#include "ice_sriov.h"
+
+/**
+ * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
+ * @qs_cfg: gathered variables needed for PF->VSI queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
+{
+ unsigned int offset, i;
+
+ mutex_lock(qs_cfg->qs_mutex);
+ offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size,
+ 0, qs_cfg->q_count, 0);
+ if (offset >= qs_cfg->pf_map_size) {
+ mutex_unlock(qs_cfg->qs_mutex);
+ return -ENOMEM;
+ }
+
+ bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count);
+ for (i = 0; i < qs_cfg->q_count; i++)
+ qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = (u16)(i + offset);
+ mutex_unlock(qs_cfg->qs_mutex);
+
+ return 0;
+}
+
+/**
+ * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
+{
+ unsigned int i, index = 0;
+
+ mutex_lock(qs_cfg->qs_mutex);
+ for (i = 0; i < qs_cfg->q_count; i++) {
+ index = find_next_zero_bit(qs_cfg->pf_map,
+ qs_cfg->pf_map_size, index);
+ if (index >= qs_cfg->pf_map_size)
+ goto err_scatter;
+ set_bit(index, qs_cfg->pf_map);
+ qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = (u16)index;
+ }
+ mutex_unlock(qs_cfg->qs_mutex);
+
+ return 0;
+err_scatter:
+ for (index = 0; index < i; index++) {
+ clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map);
+ qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0;
+ }
+ mutex_unlock(qs_cfg->qs_mutex);
+
+ return -ENOMEM;
+}
+
+/**
+ * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
+ * @pf: the PF being configured
+ * @pf_q: the PF queue
+ * @ena: enable or disable state of the queue
+ *
+ * This routine will wait for the given Rx queue of the PF to reach the
+ * enabled or disabled state.
+ * Returns -ETIMEDOUT in case of failing to reach the requested state after
+ * multiple retries; else will return 0 in case of success.
+ */
+static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
+{
+ int i;
+
+ for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) {
+ if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) &
+ QRX_CTRL_QENA_STAT_M))
+ return 0;
+
+ usleep_range(20, 40);
+ }
+
+ return -ETIMEDOUT;
+}
+
+/**
+ * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @vsi: the VSI being configured
+ * @v_idx: index of the vector in the VSI struct
+ *
+ * We allocate one q_vector and set default value for ITR setting associated
+ * with this q_vector. If allocation fails we return -ENOMEM.
+ */
+static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_q_vector *q_vector;
+
+ /* allocate q_vector */
+ q_vector = devm_kzalloc(ice_pf_to_dev(pf), sizeof(*q_vector),
+ GFP_KERNEL);
+ if (!q_vector)
+ return -ENOMEM;
+
+ q_vector->vsi = vsi;
+ q_vector->v_idx = v_idx;
+ q_vector->tx.itr_setting = ICE_DFLT_TX_ITR;
+ q_vector->rx.itr_setting = ICE_DFLT_RX_ITR;
+ q_vector->tx.itr_mode = ITR_DYNAMIC;
+ q_vector->rx.itr_mode = ITR_DYNAMIC;
+ q_vector->tx.type = ICE_TX_CONTAINER;
+ q_vector->rx.type = ICE_RX_CONTAINER;
+
+ if (vsi->type == ICE_VSI_VF)
+ goto out;
+ /* only set affinity_mask if the CPU is online */
+ if (cpu_online(v_idx))
+ cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+
+ /* This will not be called in the driver load path because the netdev
+ * will not be created yet. All other cases with register the NAPI
+ * handler here (i.e. resume, reset/rebuild, etc.)
+ */
+ if (vsi->netdev)
+ netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll);
+
+out:
+ /* tie q_vector and VSI together */
+ vsi->q_vectors[v_idx] = q_vector;
+
+ return 0;
+}
+
+/**
+ * ice_free_q_vector - Free memory allocated for a specific interrupt vector
+ * @vsi: VSI having the memory freed
+ * @v_idx: index of the vector to be freed
+ */
+static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+ struct ice_q_vector *q_vector;
+ struct ice_pf *pf = vsi->back;
+ struct ice_tx_ring *tx_ring;
+ struct ice_rx_ring *rx_ring;
+ struct device *dev;
+
+ dev = ice_pf_to_dev(pf);
+ if (!vsi->q_vectors[v_idx]) {
+ dev_dbg(dev, "Queue vector at index %d not found\n", v_idx);
+ return;
+ }
+ q_vector = vsi->q_vectors[v_idx];
+
+ ice_for_each_tx_ring(tx_ring, q_vector->tx)
+ tx_ring->q_vector = NULL;
+ ice_for_each_rx_ring(rx_ring, q_vector->rx)
+ rx_ring->q_vector = NULL;
+
+ /* only VSI with an associated netdev is set up with NAPI */
+ if (vsi->netdev)
+ netif_napi_del(&q_vector->napi);
+
+ devm_kfree(dev, q_vector);
+ vsi->q_vectors[v_idx] = NULL;
+}
+
+/**
+ * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set
+ * @hw: board specific structure
+ */
+static void ice_cfg_itr_gran(struct ice_hw *hw)
+{
+ u32 regval = rd32(hw, GLINT_CTL);
+
+ /* no need to update global register if ITR gran is already set */
+ if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) &&
+ (((regval & GLINT_CTL_ITR_GRAN_200_M) >>
+ GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) &&
+ (((regval & GLINT_CTL_ITR_GRAN_100_M) >>
+ GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) &&
+ (((regval & GLINT_CTL_ITR_GRAN_50_M) >>
+ GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) &&
+ (((regval & GLINT_CTL_ITR_GRAN_25_M) >>
+ GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US))
+ return;
+
+ regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) &
+ GLINT_CTL_ITR_GRAN_200_M) |
+ ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) &
+ GLINT_CTL_ITR_GRAN_100_M) |
+ ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) &
+ GLINT_CTL_ITR_GRAN_50_M) |
+ ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) &
+ GLINT_CTL_ITR_GRAN_25_M);
+ wr32(hw, GLINT_CTL, regval);
+}
+
+/**
+ * ice_calc_txq_handle - calculate the queue handle
+ * @vsi: VSI that ring belongs to
+ * @ring: ring to get the absolute queue index
+ * @tc: traffic class number
+ */
+static u16 ice_calc_txq_handle(struct ice_vsi *vsi, struct ice_tx_ring *ring, u8 tc)
+{
+ WARN_ONCE(ice_ring_is_xdp(ring) && tc, "XDP ring can't belong to TC other than 0\n");
+
+ if (ring->ch)
+ return ring->q_index - ring->ch->base_q;
+
+ /* Idea here for calculation is that we subtract the number of queue
+ * count from TC that ring belongs to from it's absolute queue index
+ * and as a result we get the queue's index within TC.
+ */
+ return ring->q_index - vsi->tc_cfg.tc_info[tc].qoffset;
+}
+
+/**
+ * ice_eswitch_calc_txq_handle
+ * @ring: pointer to ring which unique index is needed
+ *
+ * To correctly work with many netdevs ring->q_index of Tx rings on switchdev
+ * VSI can repeat. Hardware ring setup requires unique q_index. Calculate it
+ * here by finding index in vsi->tx_rings of this ring.
+ *
+ * Return ICE_INVAL_Q_INDEX when index wasn't found. Should never happen,
+ * because VSI is get from ring->vsi, so it has to be present in this VSI.
+ */
+static u16 ice_eswitch_calc_txq_handle(struct ice_tx_ring *ring)
+{
+ struct ice_vsi *vsi = ring->vsi;
+ int i;
+
+ ice_for_each_txq(vsi, i) {
+ if (vsi->tx_rings[i] == ring)
+ return i;
+ }
+
+ return ICE_INVAL_Q_INDEX;
+}
+
+/**
+ * ice_cfg_xps_tx_ring - Configure XPS for a Tx ring
+ * @ring: The Tx ring to configure
+ *
+ * This enables/disables XPS for a given Tx descriptor ring
+ * based on the TCs enabled for the VSI that ring belongs to.
+ */
+static void ice_cfg_xps_tx_ring(struct ice_tx_ring *ring)
+{
+ if (!ring->q_vector || !ring->netdev)
+ return;
+
+ /* We only initialize XPS once, so as not to overwrite user settings */
+ if (test_and_set_bit(ICE_TX_XPS_INIT_DONE, ring->xps_state))
+ return;
+
+ netif_set_xps_queue(ring->netdev, &ring->q_vector->affinity_mask,
+ ring->q_index);
+}
+
+/**
+ * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
+ * @ring: The Tx ring to configure
+ * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
+ * @pf_q: queue index in the PF space
+ *
+ * Configure the Tx descriptor ring in TLAN context.
+ */
+static void
+ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
+{
+ struct ice_vsi *vsi = ring->vsi;
+ struct ice_hw *hw = &vsi->back->hw;
+
+ tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
+
+ tlan_ctx->port_num = vsi->port_info->lport;
+
+ /* Transmit Queue Length */
+ tlan_ctx->qlen = ring->count;
+
+ ice_set_cgd_num(tlan_ctx, ring->dcb_tc);
+
+ /* PF number */
+ tlan_ctx->pf_num = hw->pf_id;
+
+ /* queue belongs to a specific VSI type
+ * VF / VM index should be programmed per vmvf_type setting:
+ * for vmvf_type = VF, it is VF number between 0-256
+ * for vmvf_type = VM, it is VM number between 0-767
+ * for PF or EMP this field should be set to zero
+ */
+ switch (vsi->type) {
+ case ICE_VSI_LB:
+ case ICE_VSI_CTRL:
+ case ICE_VSI_PF:
+ if (ring->ch)
+ tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ;
+ else
+ tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
+ break;
+ case ICE_VSI_VF:
+ /* Firmware expects vmvf_num to be absolute VF ID */
+ tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf->vf_id;
+ tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
+ break;
+ case ICE_VSI_SWITCHDEV_CTRL:
+ tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ;
+ break;
+ default:
+ return;
+ }
+
+ /* make sure the context is associated with the right VSI */
+ if (ring->ch)
+ tlan_ctx->src_vsi = ring->ch->vsi_num;
+ else
+ tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
+
+ /* Restrict Tx timestamps to the PF VSI */
+ switch (vsi->type) {
+ case ICE_VSI_PF:
+ tlan_ctx->tsyn_ena = 1;
+ break;
+ default:
+ break;
+ }
+
+ tlan_ctx->tso_ena = ICE_TX_LEGACY;
+ tlan_ctx->tso_qnum = pf_q;
+
+ /* Legacy or Advanced Host Interface:
+ * 0: Advanced Host Interface
+ * 1: Legacy Host Interface
+ */
+ tlan_ctx->legacy_int = ICE_TX_LEGACY;
+}
+
+/**
+ * ice_rx_offset - Return expected offset into page to access data
+ * @rx_ring: Ring we are requesting offset of
+ *
+ * Returns the offset value for ring into the data buffer.
+ */
+static unsigned int ice_rx_offset(struct ice_rx_ring *rx_ring)
+{
+ if (ice_ring_uses_build_skb(rx_ring))
+ return ICE_SKB_PAD;
+ else if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+ return XDP_PACKET_HEADROOM;
+
+ return 0;
+}
+
+/**
+ * ice_setup_rx_ctx - Configure a receive ring context
+ * @ring: The Rx ring to configure
+ *
+ * Configure the Rx descriptor ring in RLAN context.
+ */
+static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
+{
+ int chain_len = ICE_MAX_CHAINED_RX_BUFS;
+ struct ice_vsi *vsi = ring->vsi;
+ u32 rxdid = ICE_RXDID_FLEX_NIC;
+ struct ice_rlan_ctx rlan_ctx;
+ struct ice_hw *hw;
+ u16 pf_q;
+ int err;
+
+ hw = &vsi->back->hw;
+
+ /* what is Rx queue number in global space of 2K Rx queues */
+ pf_q = vsi->rxq_map[ring->q_index];
+
+ /* clear the context structure first */
+ memset(&rlan_ctx, 0, sizeof(rlan_ctx));
+
+ /* Receive Queue Base Address.
+ * Indicates the starting address of the descriptor queue defined in
+ * 128 Byte units.
+ */
+ rlan_ctx.base = ring->dma >> 7;
+
+ rlan_ctx.qlen = ring->count;
+
+ /* Receive Packet Data Buffer Size.
+ * The Packet Data Buffer Size is defined in 128 byte units.
+ */
+ rlan_ctx.dbuf = DIV_ROUND_UP(ring->rx_buf_len,
+ BIT_ULL(ICE_RLAN_CTX_DBUF_S));
+
+ /* use 32 byte descriptors */
+ rlan_ctx.dsize = 1;
+
+ /* Strip the Ethernet CRC bytes before the packet is posted to host
+ * memory.
+ */
+ rlan_ctx.crcstrip = !(ring->flags & ICE_RX_FLAGS_CRC_STRIP_DIS);
+
+ /* L2TSEL flag defines the reported L2 Tags in the receive descriptor
+ * and it needs to remain 1 for non-DVM capable configurations to not
+ * break backward compatibility for VF drivers. Setting this field to 0
+ * will cause the single/outer VLAN tag to be stripped to the L2TAG2_2ND
+ * field in the Rx descriptor. Setting it to 1 allows the VLAN tag to
+ * be stripped in L2TAG1 of the Rx descriptor, which is where VFs will
+ * check for the tag
+ */
+ if (ice_is_dvm_ena(hw))
+ if (vsi->type == ICE_VSI_VF &&
+ ice_vf_is_port_vlan_ena(vsi->vf))
+ rlan_ctx.l2tsel = 1;
+ else
+ rlan_ctx.l2tsel = 0;
+ else
+ rlan_ctx.l2tsel = 1;
+
+ rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
+ rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
+ rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
+
+ /* This controls whether VLAN is stripped from inner headers
+ * The VLAN in the inner L2 header is stripped to the receive
+ * descriptor if enabled by this flag.
+ */
+ rlan_ctx.showiv = 0;
+
+ /* For AF_XDP ZC, we disallow packets to span on
+ * multiple buffers, thus letting us skip that
+ * handling in the fast-path.
+ */
+ if (ring->xsk_pool)
+ chain_len = 1;
+ /* Max packet size for this queue - must not be set to a larger value
+ * than 5 x DBUF
+ */
+ rlan_ctx.rxmax = min_t(u32, vsi->max_frame,
+ chain_len * ring->rx_buf_len);
+
+ /* Rx queue threshold in units of 64 */
+ rlan_ctx.lrxqthresh = 1;
+
+ /* Enable Flexible Descriptors in the queue context which
+ * allows this driver to select a specific receive descriptor format
+ * increasing context priority to pick up profile ID; default is 0x01;
+ * setting to 0x03 to ensure profile is programming if prev context is
+ * of same priority
+ */
+ if (vsi->type != ICE_VSI_VF)
+ ice_write_qrxflxp_cntxt(hw, pf_q, rxdid, 0x3, true);
+ else
+ ice_write_qrxflxp_cntxt(hw, pf_q, ICE_RXDID_LEGACY_1, 0x3,
+ false);
+
+ /* Absolute queue number out of 2K needs to be passed */
+ err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
+ if (err) {
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
+ pf_q, err);
+ return -EIO;
+ }
+
+ if (vsi->type == ICE_VSI_VF)
+ return 0;
+
+ /* configure Rx buffer alignment */
+ if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+ ice_clear_ring_build_skb_ena(ring);
+ else
+ ice_set_ring_build_skb_ena(ring);
+
+ ring->rx_offset = ice_rx_offset(ring);
+
+ /* init queue specific tail register */
+ ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
+ writel(0, ring->tail);
+
+ return 0;
+}
+
+/**
+ * ice_vsi_cfg_rxq - Configure an Rx queue
+ * @ring: the ring being configured
+ *
+ * Return 0 on success and a negative value on error.
+ */
+int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
+{
+ struct device *dev = ice_pf_to_dev(ring->vsi->back);
+ u16 num_bufs = ICE_DESC_UNUSED(ring);
+ int err;
+
+ ring->rx_buf_len = ring->vsi->rx_buf_len;
+
+ if (ring->vsi->type == ICE_VSI_PF) {
+ if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+ /* coverity[check_return] */
+ xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+ ring->q_index, ring->q_vector->napi.napi_id);
+
+ ring->xsk_pool = ice_xsk_pool(ring);
+ if (ring->xsk_pool) {
+ xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+
+ ring->rx_buf_len =
+ xsk_pool_get_rx_frame_size(ring->xsk_pool);
+ err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ MEM_TYPE_XSK_BUFF_POOL,
+ NULL);
+ if (err)
+ return err;
+ xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
+
+ dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
+ ring->q_index);
+ } else {
+ if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+ /* coverity[check_return] */
+ xdp_rxq_info_reg(&ring->xdp_rxq,
+ ring->netdev,
+ ring->q_index, ring->q_vector->napi.napi_id);
+
+ err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED,
+ NULL);
+ if (err)
+ return err;
+ }
+ }
+
+ err = ice_setup_rx_ctx(ring);
+ if (err) {
+ dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n",
+ ring->q_index, err);
+ return err;
+ }
+
+ if (ring->xsk_pool) {
+ bool ok;
+
+ if (!xsk_buff_can_alloc(ring->xsk_pool, num_bufs)) {
+ dev_warn(dev, "XSK buffer pool does not provide enough addresses to fill %d buffers on Rx ring %d\n",
+ num_bufs, ring->q_index);
+ dev_warn(dev, "Change Rx ring/fill queue size to avoid performance issues\n");
+
+ return 0;
+ }
+
+ ok = ice_alloc_rx_bufs_zc(ring, num_bufs);
+ if (!ok) {
+ u16 pf_q = ring->vsi->rxq_map[ring->q_index];
+
+ dev_info(dev, "Failed to allocate some buffers on XSK buffer pool enabled Rx ring %d (pf_q %d)\n",
+ ring->q_index, pf_q);
+ }
+
+ return 0;
+ }
+
+ ice_alloc_rx_bufs(ring, num_bufs);
+
+ return 0;
+}
+
+/**
+ * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * This function first tries to find contiguous space. If it is not successful,
+ * it tries with the scatter approach.
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg)
+{
+ int ret = 0;
+
+ ret = __ice_vsi_get_qs_contig(qs_cfg);
+ if (ret) {
+ /* contig failed, so try with scatter approach */
+ qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER;
+ qs_cfg->q_count = min_t(unsigned int, qs_cfg->q_count,
+ qs_cfg->scatter_count);
+ ret = __ice_vsi_get_qs_sc(qs_cfg);
+ }
+ return ret;
+}
+
+/**
+ * ice_vsi_ctrl_one_rx_ring - start/stop VSI's Rx ring with no busy wait
+ * @vsi: the VSI being configured
+ * @ena: start or stop the Rx ring
+ * @rxq_idx: 0-based Rx queue index for the VSI passed in
+ * @wait: wait or don't wait for configuration to finish in hardware
+ *
+ * Return 0 on success and negative on error.
+ */
+int
+ice_vsi_ctrl_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx, bool wait)
+{
+ int pf_q = vsi->rxq_map[rxq_idx];
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u32 rx_reg;
+
+ rx_reg = rd32(hw, QRX_CTRL(pf_q));
+
+ /* Skip if the queue is already in the requested state */
+ if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
+ return 0;
+
+ /* turn on/off the queue */
+ if (ena)
+ rx_reg |= QRX_CTRL_QENA_REQ_M;
+ else
+ rx_reg &= ~QRX_CTRL_QENA_REQ_M;
+ wr32(hw, QRX_CTRL(pf_q), rx_reg);
+
+ if (!wait)
+ return 0;
+
+ ice_flush(hw);
+ return ice_pf_rxq_wait(pf, pf_q, ena);
+}
+
+/**
+ * ice_vsi_wait_one_rx_ring - wait for a VSI's Rx ring to be stopped/started
+ * @vsi: the VSI being configured
+ * @ena: true/false to verify Rx ring has been enabled/disabled respectively
+ * @rxq_idx: 0-based Rx queue index for the VSI passed in
+ *
+ * This routine will wait for the given Rx queue of the VSI to reach the
+ * enabled or disabled state. Returns -ETIMEDOUT in case of failing to reach
+ * the requested state after multiple retries; else will return 0 in case of
+ * success.
+ */
+int ice_vsi_wait_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
+{
+ int pf_q = vsi->rxq_map[rxq_idx];
+ struct ice_pf *pf = vsi->back;
+
+ return ice_pf_rxq_wait(pf, pf_q, ena);
+}
+
+/**
+ * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * We allocate one q_vector per queue interrupt. If allocation fails we
+ * return -ENOMEM.
+ */
+int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
+{
+ struct device *dev = ice_pf_to_dev(vsi->back);
+ u16 v_idx;
+ int err;
+
+ if (vsi->q_vectors[0]) {
+ dev_dbg(dev, "VSI %d has existing q_vectors\n", vsi->vsi_num);
+ return -EEXIST;
+ }
+
+ for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++) {
+ err = ice_vsi_alloc_q_vector(vsi, v_idx);
+ if (err)
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ while (v_idx--)
+ ice_free_q_vector(vsi, v_idx);
+
+ dev_err(dev, "Failed to allocate %d q_vector for VSI %d, ret=%d\n",
+ vsi->num_q_vectors, vsi->vsi_num, err);
+ vsi->num_q_vectors = 0;
+ return err;
+}
+
+/**
+ * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * This function maps descriptor rings to the queue-specific vectors allotted
+ * through the MSI-X enabling code. On a constrained vector budget, we map Tx
+ * and Rx rings to the vector as "efficiently" as possible.
+ */
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
+{
+ int q_vectors = vsi->num_q_vectors;
+ u16 tx_rings_rem, rx_rings_rem;
+ int v_id;
+
+ /* initially assigning remaining rings count to VSIs num queue value */
+ tx_rings_rem = vsi->num_txq;
+ rx_rings_rem = vsi->num_rxq;
+
+ for (v_id = 0; v_id < q_vectors; v_id++) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
+ u8 tx_rings_per_v, rx_rings_per_v;
+ u16 q_id, q_base;
+
+ /* Tx rings mapping to vector */
+ tx_rings_per_v = (u8)DIV_ROUND_UP(tx_rings_rem,
+ q_vectors - v_id);
+ q_vector->num_ring_tx = tx_rings_per_v;
+ q_vector->tx.tx_ring = NULL;
+ q_vector->tx.itr_idx = ICE_TX_ITR;
+ q_base = vsi->num_txq - tx_rings_rem;
+
+ for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
+ struct ice_tx_ring *tx_ring = vsi->tx_rings[q_id];
+
+ tx_ring->q_vector = q_vector;
+ tx_ring->next = q_vector->tx.tx_ring;
+ q_vector->tx.tx_ring = tx_ring;
+ }
+ tx_rings_rem -= tx_rings_per_v;
+
+ /* Rx rings mapping to vector */
+ rx_rings_per_v = (u8)DIV_ROUND_UP(rx_rings_rem,
+ q_vectors - v_id);
+ q_vector->num_ring_rx = rx_rings_per_v;
+ q_vector->rx.rx_ring = NULL;
+ q_vector->rx.itr_idx = ICE_RX_ITR;
+ q_base = vsi->num_rxq - rx_rings_rem;
+
+ for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
+ struct ice_rx_ring *rx_ring = vsi->rx_rings[q_id];
+
+ rx_ring->q_vector = q_vector;
+ rx_ring->next = q_vector->rx.rx_ring;
+ q_vector->rx.rx_ring = rx_ring;
+ }
+ rx_rings_rem -= rx_rings_per_v;
+ }
+}
+
+/**
+ * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
+ * @vsi: the VSI having memory freed
+ */
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
+{
+ int v_idx;
+
+ ice_for_each_q_vector(vsi, v_idx)
+ ice_free_q_vector(vsi, v_idx);
+}
+
+/**
+ * ice_vsi_cfg_txq - Configure single Tx queue
+ * @vsi: the VSI that queue belongs to
+ * @ring: Tx ring to be configured
+ * @qg_buf: queue group buffer
+ */
+int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring,
+ struct ice_aqc_add_tx_qgrp *qg_buf)
+{
+ u8 buf_len = struct_size(qg_buf, txqs, 1);
+ struct ice_tlan_ctx tlan_ctx = { 0 };
+ struct ice_aqc_add_txqs_perq *txq;
+ struct ice_channel *ch = ring->ch;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ int status;
+ u16 pf_q;
+ u8 tc;
+
+ /* Configure XPS */
+ ice_cfg_xps_tx_ring(ring);
+
+ pf_q = ring->reg_idx;
+ ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
+ /* copy context contents into the qg_buf */
+ qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
+ ice_set_ctx(hw, (u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
+ ice_tlan_ctx_info);
+
+ /* init queue specific tail reg. It is referred as
+ * transmit comm scheduler queue doorbell.
+ */
+ ring->tail = hw->hw_addr + QTX_COMM_DBELL(pf_q);
+
+ if (IS_ENABLED(CONFIG_DCB))
+ tc = ring->dcb_tc;
+ else
+ tc = 0;
+
+ /* Add unique software queue handle of the Tx queue per
+ * TC into the VSI Tx ring
+ */
+ if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) {
+ ring->q_handle = ice_eswitch_calc_txq_handle(ring);
+
+ if (ring->q_handle == ICE_INVAL_Q_INDEX)
+ return -ENODEV;
+ } else {
+ ring->q_handle = ice_calc_txq_handle(vsi, ring, tc);
+ }
+
+ if (ch)
+ status = ice_ena_vsi_txq(vsi->port_info, ch->ch_vsi->idx, 0,
+ ring->q_handle, 1, qg_buf, buf_len,
+ NULL);
+ else
+ status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc,
+ ring->q_handle, 1, qg_buf, buf_len,
+ NULL);
+ if (status) {
+ dev_err(ice_pf_to_dev(pf), "Failed to set LAN Tx queue context, error: %d\n",
+ status);
+ return status;
+ }
+
+ /* Add Tx Queue TEID into the VSI Tx ring from the
+ * response. This will complete configuring and
+ * enabling the queue.
+ */
+ txq = &qg_buf->txqs[0];
+ if (pf_q == le16_to_cpu(txq->txq_id))
+ ring->txq_teid = le32_to_cpu(txq->q_teid);
+
+ return 0;
+}
+
+/**
+ * ice_cfg_itr - configure the initial interrupt throttle values
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector that's being configured
+ *
+ * Configure interrupt throttling values for the ring containers that are
+ * associated with the interrupt vector passed in.
+ */
+void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+ ice_cfg_itr_gran(hw);
+
+ if (q_vector->num_ring_rx)
+ ice_write_itr(&q_vector->rx, q_vector->rx.itr_setting);
+
+ if (q_vector->num_ring_tx)
+ ice_write_itr(&q_vector->tx, q_vector->tx.itr_setting);
+
+ ice_write_intrl(q_vector, q_vector->intrl);
+}
+
+/**
+ * ice_cfg_txq_interrupt - configure interrupt on Tx queue
+ * @vsi: the VSI being configured
+ * @txq: Tx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
+ * within the function space.
+ */
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u32 val;
+
+ itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
+
+ val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
+ ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
+
+ wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+ wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]),
+ val);
+ }
+ ice_flush(hw);
+}
+
+/**
+ * ice_cfg_rxq_interrupt - configure interrupt on Rx queue
+ * @vsi: the VSI being configured
+ * @rxq: Rx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
+ * within the function space.
+ */
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u32 val;
+
+ itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
+
+ val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
+ ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
+
+ wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+
+ ice_flush(hw);
+}
+
+/**
+ * ice_trigger_sw_intr - trigger a software interrupt
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector to trigger the software interrupt for
+ */
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+ wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
+ (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
+ GLINT_DYN_CTL_SWINT_TRIG_M |
+ GLINT_DYN_CTL_INTENA_M);
+}
+
+/**
+ * ice_vsi_stop_tx_ring - Disable single Tx ring
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ * @ring: Tx ring to be stopped
+ * @txq_meta: Meta data of Tx ring to be stopped
+ */
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+ u16 rel_vmvf_num, struct ice_tx_ring *ring,
+ struct ice_txq_meta *txq_meta)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_q_vector *q_vector;
+ struct ice_hw *hw = &pf->hw;
+ int status;
+ u32 val;
+
+ /* clear cause_ena bit for disabled queues */
+ val = rd32(hw, QINT_TQCTL(ring->reg_idx));
+ val &= ~QINT_TQCTL_CAUSE_ENA_M;
+ wr32(hw, QINT_TQCTL(ring->reg_idx), val);
+
+ /* software is expected to wait for 100 ns */
+ ndelay(100);
+
+ /* trigger a software interrupt for the vector
+ * associated to the queue to schedule NAPI handler
+ */
+ q_vector = ring->q_vector;
+ if (q_vector && !(vsi->vf && ice_is_vf_disabled(vsi->vf)))
+ ice_trigger_sw_intr(hw, q_vector);
+
+ status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx,
+ txq_meta->tc, 1, &txq_meta->q_handle,
+ &txq_meta->q_id, &txq_meta->q_teid, rst_src,
+ rel_vmvf_num, NULL);
+
+ /* if the disable queue command was exercised during an
+ * active reset flow, -EBUSY is returned.
+ * This is not an error as the reset operation disables
+ * queues at the hardware level anyway.
+ */
+ if (status == -EBUSY) {
+ dev_dbg(ice_pf_to_dev(vsi->back), "Reset in progress. LAN Tx queues already disabled\n");
+ } else if (status == -ENOENT) {
+ dev_dbg(ice_pf_to_dev(vsi->back), "LAN Tx queues do not exist, nothing to disable\n");
+ } else if (status) {
+ dev_dbg(ice_pf_to_dev(vsi->back), "Failed to disable LAN Tx queues, error: %d\n",
+ status);
+ return status;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_fill_txq_meta - Prepare the Tx queue's meta data
+ * @vsi: VSI that ring belongs to
+ * @ring: ring that txq_meta will be based on
+ * @txq_meta: a helper struct that wraps Tx queue's information
+ *
+ * Set up a helper struct that will contain all the necessary fields that
+ * are needed for stopping Tx queue
+ */
+void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring,
+ struct ice_txq_meta *txq_meta)
+{
+ struct ice_channel *ch = ring->ch;
+ u8 tc;
+
+ if (IS_ENABLED(CONFIG_DCB))
+ tc = ring->dcb_tc;
+ else
+ tc = 0;
+
+ txq_meta->q_id = ring->reg_idx;
+ txq_meta->q_teid = ring->txq_teid;
+ txq_meta->q_handle = ring->q_handle;
+ if (ch) {
+ txq_meta->vsi_idx = ch->ch_vsi->idx;
+ txq_meta->tc = 0;
+ } else {
+ txq_meta->vsi_idx = vsi->idx;
+ txq_meta->tc = tc;
+ }
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_base.h b/drivers/net/ethernet/intel/ice/ice_base.h
new file mode 100644
index 000000000..b67dca417
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_base.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_BASE_H_
+#define _ICE_BASE_H_
+
+#include "ice.h"
+
+int ice_vsi_cfg_rxq(struct ice_rx_ring *ring);
+int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg);
+int
+ice_vsi_ctrl_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx, bool wait);
+int ice_vsi_wait_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
+int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi);
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
+int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring,
+ struct ice_aqc_add_tx_qgrp *qg_buf);
+void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+ u16 rel_vmvf_num, struct ice_tx_ring *ring,
+ struct ice_txq_meta *txq_meta);
+void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring,
+ struct ice_txq_meta *txq_meta);
+#endif /* _ICE_BASE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_cgu_regs.h b/drivers/net/ethernet/intel/ice/ice_cgu_regs.h
new file mode 100644
index 000000000..57abd5238
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_cgu_regs.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2021, Intel Corporation. */
+
+#ifndef _ICE_CGU_REGS_H_
+#define _ICE_CGU_REGS_H_
+
+#define NAC_CGU_DWORD9 0x24
+union nac_cgu_dword9 {
+ struct {
+ u32 time_ref_freq_sel : 3;
+ u32 clk_eref1_en : 1;
+ u32 clk_eref0_en : 1;
+ u32 time_ref_en : 1;
+ u32 time_sync_en : 1;
+ u32 one_pps_out_en : 1;
+ u32 clk_ref_synce_en : 1;
+ u32 clk_synce1_en : 1;
+ u32 clk_synce0_en : 1;
+ u32 net_clk_ref1_en : 1;
+ u32 net_clk_ref0_en : 1;
+ u32 clk_synce1_amp : 2;
+ u32 misc6 : 1;
+ u32 clk_synce0_amp : 2;
+ u32 one_pps_out_amp : 2;
+ u32 misc24 : 12;
+ } field;
+ u32 val;
+};
+
+#define NAC_CGU_DWORD19 0x4c
+union nac_cgu_dword19 {
+ struct {
+ u32 tspll_fbdiv_intgr : 8;
+ u32 fdpll_ulck_thr : 5;
+ u32 misc15 : 3;
+ u32 tspll_ndivratio : 4;
+ u32 tspll_iref_ndivratio : 3;
+ u32 misc19 : 1;
+ u32 japll_ndivratio : 4;
+ u32 japll_iref_ndivratio : 3;
+ u32 misc27 : 1;
+ } field;
+ u32 val;
+};
+
+#define NAC_CGU_DWORD22 0x58
+union nac_cgu_dword22 {
+ struct {
+ u32 fdpll_frac_div_out_nc : 2;
+ u32 fdpll_lock_int_for : 1;
+ u32 synce_hdov_int_for : 1;
+ u32 synce_lock_int_for : 1;
+ u32 fdpll_phlead_slip_nc : 1;
+ u32 fdpll_acc1_ovfl_nc : 1;
+ u32 fdpll_acc2_ovfl_nc : 1;
+ u32 synce_status_nc : 6;
+ u32 fdpll_acc1f_ovfl : 1;
+ u32 misc18 : 1;
+ u32 fdpllclk_div : 4;
+ u32 time1588clk_div : 4;
+ u32 synceclk_div : 4;
+ u32 synceclk_sel_div2 : 1;
+ u32 fdpllclk_sel_div2 : 1;
+ u32 time1588clk_sel_div2 : 1;
+ u32 misc3 : 1;
+ } field;
+ u32 val;
+};
+
+#define NAC_CGU_DWORD24 0x60
+union nac_cgu_dword24 {
+ struct {
+ u32 tspll_fbdiv_frac : 22;
+ u32 misc20 : 2;
+ u32 ts_pll_enable : 1;
+ u32 time_sync_tspll_align_sel : 1;
+ u32 ext_synce_sel : 1;
+ u32 ref1588_ck_div : 4;
+ u32 time_ref_sel : 1;
+ } field;
+ u32 val;
+};
+
+#define TSPLL_CNTR_BIST_SETTINGS 0x344
+union tspll_cntr_bist_settings {
+ struct {
+ u32 i_irefgen_settling_time_cntr_7_0 : 8;
+ u32 i_irefgen_settling_time_ro_standby_1_0 : 2;
+ u32 reserved195 : 5;
+ u32 i_plllock_sel_0 : 1;
+ u32 i_plllock_sel_1 : 1;
+ u32 i_plllock_cnt_6_0 : 7;
+ u32 i_plllock_cnt_10_7 : 4;
+ u32 reserved200 : 4;
+ } field;
+ u32 val;
+};
+
+#define TSPLL_RO_BWM_LF 0x370
+union tspll_ro_bwm_lf {
+ struct {
+ u32 bw_freqov_high_cri_7_0 : 8;
+ u32 bw_freqov_high_cri_9_8 : 2;
+ u32 biascaldone_cri : 1;
+ u32 plllock_gain_tran_cri : 1;
+ u32 plllock_true_lock_cri : 1;
+ u32 pllunlock_flag_cri : 1;
+ u32 afcerr_cri : 1;
+ u32 afcdone_cri : 1;
+ u32 feedfwrdgain_cal_cri_7_0 : 8;
+ u32 m2fbdivmod_cri_7_0 : 8;
+ } field;
+ u32 val;
+};
+
+#endif /* _ICE_CGU_REGS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
new file mode 100644
index 000000000..039342a0e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -0,0 +1,5514 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_sched.h"
+#include "ice_adminq_cmd.h"
+#include "ice_flow.h"
+
+#define ICE_PF_RESET_WAIT_COUNT 300
+
+static const char * const ice_link_mode_str_low[] = {
+ [0] = "100BASE_TX",
+ [1] = "100M_SGMII",
+ [2] = "1000BASE_T",
+ [3] = "1000BASE_SX",
+ [4] = "1000BASE_LX",
+ [5] = "1000BASE_KX",
+ [6] = "1G_SGMII",
+ [7] = "2500BASE_T",
+ [8] = "2500BASE_X",
+ [9] = "2500BASE_KX",
+ [10] = "5GBASE_T",
+ [11] = "5GBASE_KR",
+ [12] = "10GBASE_T",
+ [13] = "10G_SFI_DA",
+ [14] = "10GBASE_SR",
+ [15] = "10GBASE_LR",
+ [16] = "10GBASE_KR_CR1",
+ [17] = "10G_SFI_AOC_ACC",
+ [18] = "10G_SFI_C2C",
+ [19] = "25GBASE_T",
+ [20] = "25GBASE_CR",
+ [21] = "25GBASE_CR_S",
+ [22] = "25GBASE_CR1",
+ [23] = "25GBASE_SR",
+ [24] = "25GBASE_LR",
+ [25] = "25GBASE_KR",
+ [26] = "25GBASE_KR_S",
+ [27] = "25GBASE_KR1",
+ [28] = "25G_AUI_AOC_ACC",
+ [29] = "25G_AUI_C2C",
+ [30] = "40GBASE_CR4",
+ [31] = "40GBASE_SR4",
+ [32] = "40GBASE_LR4",
+ [33] = "40GBASE_KR4",
+ [34] = "40G_XLAUI_AOC_ACC",
+ [35] = "40G_XLAUI",
+ [36] = "50GBASE_CR2",
+ [37] = "50GBASE_SR2",
+ [38] = "50GBASE_LR2",
+ [39] = "50GBASE_KR2",
+ [40] = "50G_LAUI2_AOC_ACC",
+ [41] = "50G_LAUI2",
+ [42] = "50G_AUI2_AOC_ACC",
+ [43] = "50G_AUI2",
+ [44] = "50GBASE_CP",
+ [45] = "50GBASE_SR",
+ [46] = "50GBASE_FR",
+ [47] = "50GBASE_LR",
+ [48] = "50GBASE_KR_PAM4",
+ [49] = "50G_AUI1_AOC_ACC",
+ [50] = "50G_AUI1",
+ [51] = "100GBASE_CR4",
+ [52] = "100GBASE_SR4",
+ [53] = "100GBASE_LR4",
+ [54] = "100GBASE_KR4",
+ [55] = "100G_CAUI4_AOC_ACC",
+ [56] = "100G_CAUI4",
+ [57] = "100G_AUI4_AOC_ACC",
+ [58] = "100G_AUI4",
+ [59] = "100GBASE_CR_PAM4",
+ [60] = "100GBASE_KR_PAM4",
+ [61] = "100GBASE_CP2",
+ [62] = "100GBASE_SR2",
+ [63] = "100GBASE_DR",
+};
+
+static const char * const ice_link_mode_str_high[] = {
+ [0] = "100GBASE_KR2_PAM4",
+ [1] = "100G_CAUI2_AOC_ACC",
+ [2] = "100G_CAUI2",
+ [3] = "100G_AUI2_AOC_ACC",
+ [4] = "100G_AUI2",
+};
+
+/**
+ * ice_dump_phy_type - helper function to dump phy_type
+ * @hw: pointer to the HW structure
+ * @low: 64 bit value for phy_type_low
+ * @high: 64 bit value for phy_type_high
+ * @prefix: prefix string to differentiate multiple dumps
+ */
+static void
+ice_dump_phy_type(struct ice_hw *hw, u64 low, u64 high, const char *prefix)
+{
+ ice_debug(hw, ICE_DBG_PHY, "%s: phy_type_low: 0x%016llx\n", prefix, low);
+
+ for (u32 i = 0; i < BITS_PER_TYPE(typeof(low)); i++) {
+ if (low & BIT_ULL(i))
+ ice_debug(hw, ICE_DBG_PHY, "%s: bit(%d): %s\n",
+ prefix, i, ice_link_mode_str_low[i]);
+ }
+
+ ice_debug(hw, ICE_DBG_PHY, "%s: phy_type_high: 0x%016llx\n", prefix, high);
+
+ for (u32 i = 0; i < BITS_PER_TYPE(typeof(high)); i++) {
+ if (high & BIT_ULL(i))
+ ice_debug(hw, ICE_DBG_PHY, "%s: bit(%d): %s\n",
+ prefix, i, ice_link_mode_str_high[i]);
+ }
+}
+
+/**
+ * ice_set_mac_type - Sets MAC type
+ * @hw: pointer to the HW structure
+ *
+ * This function sets the MAC type of the adapter based on the
+ * vendor ID and device ID stored in the HW structure.
+ */
+static int ice_set_mac_type(struct ice_hw *hw)
+{
+ if (hw->vendor_id != PCI_VENDOR_ID_INTEL)
+ return -ENODEV;
+
+ switch (hw->device_id) {
+ case ICE_DEV_ID_E810C_BACKPLANE:
+ case ICE_DEV_ID_E810C_QSFP:
+ case ICE_DEV_ID_E810C_SFP:
+ case ICE_DEV_ID_E810_XXV_BACKPLANE:
+ case ICE_DEV_ID_E810_XXV_QSFP:
+ case ICE_DEV_ID_E810_XXV_SFP:
+ hw->mac_type = ICE_MAC_E810;
+ break;
+ case ICE_DEV_ID_E823C_10G_BASE_T:
+ case ICE_DEV_ID_E823C_BACKPLANE:
+ case ICE_DEV_ID_E823C_QSFP:
+ case ICE_DEV_ID_E823C_SFP:
+ case ICE_DEV_ID_E823C_SGMII:
+ case ICE_DEV_ID_E822C_10G_BASE_T:
+ case ICE_DEV_ID_E822C_BACKPLANE:
+ case ICE_DEV_ID_E822C_QSFP:
+ case ICE_DEV_ID_E822C_SFP:
+ case ICE_DEV_ID_E822C_SGMII:
+ case ICE_DEV_ID_E822L_10G_BASE_T:
+ case ICE_DEV_ID_E822L_BACKPLANE:
+ case ICE_DEV_ID_E822L_SFP:
+ case ICE_DEV_ID_E822L_SGMII:
+ case ICE_DEV_ID_E823L_10G_BASE_T:
+ case ICE_DEV_ID_E823L_1GBE:
+ case ICE_DEV_ID_E823L_BACKPLANE:
+ case ICE_DEV_ID_E823L_QSFP:
+ case ICE_DEV_ID_E823L_SFP:
+ hw->mac_type = ICE_MAC_GENERIC;
+ break;
+ default:
+ hw->mac_type = ICE_MAC_UNKNOWN;
+ break;
+ }
+
+ ice_debug(hw, ICE_DBG_INIT, "mac_type: %d\n", hw->mac_type);
+ return 0;
+}
+
+/**
+ * ice_is_e810
+ * @hw: pointer to the hardware structure
+ *
+ * returns true if the device is E810 based, false if not.
+ */
+bool ice_is_e810(struct ice_hw *hw)
+{
+ return hw->mac_type == ICE_MAC_E810;
+}
+
+/**
+ * ice_is_e810t
+ * @hw: pointer to the hardware structure
+ *
+ * returns true if the device is E810T based, false if not.
+ */
+bool ice_is_e810t(struct ice_hw *hw)
+{
+ switch (hw->device_id) {
+ case ICE_DEV_ID_E810C_SFP:
+ switch (hw->subsystem_device_id) {
+ case ICE_SUBDEV_ID_E810T:
+ case ICE_SUBDEV_ID_E810T2:
+ case ICE_SUBDEV_ID_E810T3:
+ case ICE_SUBDEV_ID_E810T4:
+ case ICE_SUBDEV_ID_E810T6:
+ case ICE_SUBDEV_ID_E810T7:
+ return true;
+ }
+ break;
+ case ICE_DEV_ID_E810C_QSFP:
+ switch (hw->subsystem_device_id) {
+ case ICE_SUBDEV_ID_E810T2:
+ case ICE_SUBDEV_ID_E810T3:
+ case ICE_SUBDEV_ID_E810T5:
+ return true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+/**
+ * ice_clear_pf_cfg - Clear PF configuration
+ * @hw: pointer to the hardware structure
+ *
+ * Clears any existing PF configuration (VSIs, VSI lists, switch rules, port
+ * configuration, flow director filters, etc.).
+ */
+int ice_clear_pf_cfg(struct ice_hw *hw)
+{
+ struct ice_aq_desc desc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_clear_pf_cfg);
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_manage_mac_read - manage MAC address read command
+ * @hw: pointer to the HW struct
+ * @buf: a virtual buffer to hold the manage MAC read response
+ * @buf_size: Size of the virtual buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function is used to return per PF station MAC address (0x0107).
+ * NOTE: Upon successful completion of this command, MAC address information
+ * is returned in user specified buffer. Please interpret user specified
+ * buffer as "manage_mac_read" response.
+ * Response such as various MAC addresses are stored in HW struct (port.mac)
+ * ice_discover_dev_caps is expected to be called before this function is
+ * called.
+ */
+static int
+ice_aq_manage_mac_read(struct ice_hw *hw, void *buf, u16 buf_size,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_manage_mac_read_resp *resp;
+ struct ice_aqc_manage_mac_read *cmd;
+ struct ice_aq_desc desc;
+ int status;
+ u16 flags;
+ u8 i;
+
+ cmd = &desc.params.mac_read;
+
+ if (buf_size < sizeof(*resp))
+ return -EINVAL;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_manage_mac_read);
+
+ status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+ if (status)
+ return status;
+
+ resp = buf;
+ flags = le16_to_cpu(cmd->flags) & ICE_AQC_MAN_MAC_READ_M;
+
+ if (!(flags & ICE_AQC_MAN_MAC_LAN_ADDR_VALID)) {
+ ice_debug(hw, ICE_DBG_LAN, "got invalid MAC address\n");
+ return -EIO;
+ }
+
+ /* A single port can report up to two (LAN and WoL) addresses */
+ for (i = 0; i < cmd->num_addr; i++)
+ if (resp[i].addr_type == ICE_AQC_MAN_MAC_ADDR_TYPE_LAN) {
+ ether_addr_copy(hw->port_info->mac.lan_addr,
+ resp[i].mac_addr);
+ ether_addr_copy(hw->port_info->mac.perm_addr,
+ resp[i].mac_addr);
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_aq_get_phy_caps - returns PHY capabilities
+ * @pi: port information structure
+ * @qual_mods: report qualified modules
+ * @report_mode: report mode capabilities
+ * @pcaps: structure for PHY capabilities to be filled
+ * @cd: pointer to command details structure or NULL
+ *
+ * Returns the various PHY capabilities supported on the Port (0x0600)
+ */
+int
+ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
+ struct ice_aqc_get_phy_caps_data *pcaps,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_get_phy_caps *cmd;
+ u16 pcaps_size = sizeof(*pcaps);
+ struct ice_aq_desc desc;
+ const char *prefix;
+ struct ice_hw *hw;
+ int status;
+
+ cmd = &desc.params.get_phy;
+
+ if (!pcaps || (report_mode & ~ICE_AQC_REPORT_MODE_M) || !pi)
+ return -EINVAL;
+ hw = pi->hw;
+
+ if (report_mode == ICE_AQC_REPORT_DFLT_CFG &&
+ !ice_fw_supports_report_dflt_cfg(hw))
+ return -EINVAL;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_phy_caps);
+
+ if (qual_mods)
+ cmd->param0 |= cpu_to_le16(ICE_AQC_GET_PHY_RQM);
+
+ cmd->param0 |= cpu_to_le16(report_mode);
+ status = ice_aq_send_cmd(hw, &desc, pcaps, pcaps_size, cd);
+
+ ice_debug(hw, ICE_DBG_LINK, "get phy caps dump\n");
+
+ switch (report_mode) {
+ case ICE_AQC_REPORT_TOPO_CAP_MEDIA:
+ prefix = "phy_caps_media";
+ break;
+ case ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA:
+ prefix = "phy_caps_no_media";
+ break;
+ case ICE_AQC_REPORT_ACTIVE_CFG:
+ prefix = "phy_caps_active";
+ break;
+ case ICE_AQC_REPORT_DFLT_CFG:
+ prefix = "phy_caps_default";
+ break;
+ default:
+ prefix = "phy_caps_invalid";
+ }
+
+ ice_dump_phy_type(hw, le64_to_cpu(pcaps->phy_type_low),
+ le64_to_cpu(pcaps->phy_type_high), prefix);
+
+ ice_debug(hw, ICE_DBG_LINK, "%s: report_mode = 0x%x\n",
+ prefix, report_mode);
+ ice_debug(hw, ICE_DBG_LINK, "%s: caps = 0x%x\n", prefix, pcaps->caps);
+ ice_debug(hw, ICE_DBG_LINK, "%s: low_power_ctrl_an = 0x%x\n", prefix,
+ pcaps->low_power_ctrl_an);
+ ice_debug(hw, ICE_DBG_LINK, "%s: eee_cap = 0x%x\n", prefix,
+ pcaps->eee_cap);
+ ice_debug(hw, ICE_DBG_LINK, "%s: eeer_value = 0x%x\n", prefix,
+ pcaps->eeer_value);
+ ice_debug(hw, ICE_DBG_LINK, "%s: link_fec_options = 0x%x\n", prefix,
+ pcaps->link_fec_options);
+ ice_debug(hw, ICE_DBG_LINK, "%s: module_compliance_enforcement = 0x%x\n",
+ prefix, pcaps->module_compliance_enforcement);
+ ice_debug(hw, ICE_DBG_LINK, "%s: extended_compliance_code = 0x%x\n",
+ prefix, pcaps->extended_compliance_code);
+ ice_debug(hw, ICE_DBG_LINK, "%s: module_type[0] = 0x%x\n", prefix,
+ pcaps->module_type[0]);
+ ice_debug(hw, ICE_DBG_LINK, "%s: module_type[1] = 0x%x\n", prefix,
+ pcaps->module_type[1]);
+ ice_debug(hw, ICE_DBG_LINK, "%s: module_type[2] = 0x%x\n", prefix,
+ pcaps->module_type[2]);
+
+ if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP_MEDIA) {
+ pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low);
+ pi->phy.phy_type_high = le64_to_cpu(pcaps->phy_type_high);
+ memcpy(pi->phy.link_info.module_type, &pcaps->module_type,
+ sizeof(pi->phy.link_info.module_type));
+ }
+
+ return status;
+}
+
+/**
+ * ice_aq_get_link_topo_handle - get link topology node return status
+ * @pi: port information structure
+ * @node_type: requested node type
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get link topology node return status for specified node type (0x06E0)
+ *
+ * Node type cage can be used to determine if cage is present. If AQC
+ * returns error (ENOENT), then no cage present. If no cage present, then
+ * connection type is backplane or BASE-T.
+ */
+static int
+ice_aq_get_link_topo_handle(struct ice_port_info *pi, u8 node_type,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_get_link_topo *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.get_link_topo;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo);
+
+ cmd->addr.topo_params.node_type_ctx =
+ (ICE_AQC_LINK_TOPO_NODE_CTX_PORT <<
+ ICE_AQC_LINK_TOPO_NODE_CTX_S);
+
+ /* set node type */
+ cmd->addr.topo_params.node_type_ctx |=
+ (ICE_AQC_LINK_TOPO_NODE_TYPE_M & node_type);
+
+ return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_is_media_cage_present
+ * @pi: port information structure
+ *
+ * Returns true if media cage is present, else false. If no cage, then
+ * media type is backplane or BASE-T.
+ */
+static bool ice_is_media_cage_present(struct ice_port_info *pi)
+{
+ /* Node type cage can be used to determine if cage is present. If AQC
+ * returns error (ENOENT), then no cage present. If no cage present then
+ * connection type is backplane or BASE-T.
+ */
+ return !ice_aq_get_link_topo_handle(pi,
+ ICE_AQC_LINK_TOPO_NODE_TYPE_CAGE,
+ NULL);
+}
+
+/**
+ * ice_get_media_type - Gets media type
+ * @pi: port information structure
+ */
+static enum ice_media_type ice_get_media_type(struct ice_port_info *pi)
+{
+ struct ice_link_status *hw_link_info;
+
+ if (!pi)
+ return ICE_MEDIA_UNKNOWN;
+
+ hw_link_info = &pi->phy.link_info;
+ if (hw_link_info->phy_type_low && hw_link_info->phy_type_high)
+ /* If more than one media type is selected, report unknown */
+ return ICE_MEDIA_UNKNOWN;
+
+ if (hw_link_info->phy_type_low) {
+ /* 1G SGMII is a special case where some DA cable PHYs
+ * may show this as an option when it really shouldn't
+ * be since SGMII is meant to be between a MAC and a PHY
+ * in a backplane. Try to detect this case and handle it
+ */
+ if (hw_link_info->phy_type_low == ICE_PHY_TYPE_LOW_1G_SGMII &&
+ (hw_link_info->module_type[ICE_AQC_MOD_TYPE_IDENT] ==
+ ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE ||
+ hw_link_info->module_type[ICE_AQC_MOD_TYPE_IDENT] ==
+ ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE))
+ return ICE_MEDIA_DA;
+
+ switch (hw_link_info->phy_type_low) {
+ case ICE_PHY_TYPE_LOW_1000BASE_SX:
+ case ICE_PHY_TYPE_LOW_1000BASE_LX:
+ case ICE_PHY_TYPE_LOW_10GBASE_SR:
+ case ICE_PHY_TYPE_LOW_10GBASE_LR:
+ case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
+ case ICE_PHY_TYPE_LOW_25GBASE_SR:
+ case ICE_PHY_TYPE_LOW_25GBASE_LR:
+ case ICE_PHY_TYPE_LOW_40GBASE_SR4:
+ case ICE_PHY_TYPE_LOW_40GBASE_LR4:
+ case ICE_PHY_TYPE_LOW_50GBASE_SR2:
+ case ICE_PHY_TYPE_LOW_50GBASE_LR2:
+ case ICE_PHY_TYPE_LOW_50GBASE_SR:
+ case ICE_PHY_TYPE_LOW_50GBASE_FR:
+ case ICE_PHY_TYPE_LOW_50GBASE_LR:
+ case ICE_PHY_TYPE_LOW_100GBASE_SR4:
+ case ICE_PHY_TYPE_LOW_100GBASE_LR4:
+ case ICE_PHY_TYPE_LOW_100GBASE_SR2:
+ case ICE_PHY_TYPE_LOW_100GBASE_DR:
+ case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
+ return ICE_MEDIA_FIBER;
+ case ICE_PHY_TYPE_LOW_100BASE_TX:
+ case ICE_PHY_TYPE_LOW_1000BASE_T:
+ case ICE_PHY_TYPE_LOW_2500BASE_T:
+ case ICE_PHY_TYPE_LOW_5GBASE_T:
+ case ICE_PHY_TYPE_LOW_10GBASE_T:
+ case ICE_PHY_TYPE_LOW_25GBASE_T:
+ return ICE_MEDIA_BASET;
+ case ICE_PHY_TYPE_LOW_10G_SFI_DA:
+ case ICE_PHY_TYPE_LOW_25GBASE_CR:
+ case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
+ case ICE_PHY_TYPE_LOW_25GBASE_CR1:
+ case ICE_PHY_TYPE_LOW_40GBASE_CR4:
+ case ICE_PHY_TYPE_LOW_50GBASE_CR2:
+ case ICE_PHY_TYPE_LOW_50GBASE_CP:
+ case ICE_PHY_TYPE_LOW_100GBASE_CR4:
+ case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
+ case ICE_PHY_TYPE_LOW_100GBASE_CP2:
+ return ICE_MEDIA_DA;
+ case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
+ case ICE_PHY_TYPE_LOW_40G_XLAUI:
+ case ICE_PHY_TYPE_LOW_50G_LAUI2:
+ case ICE_PHY_TYPE_LOW_50G_AUI2:
+ case ICE_PHY_TYPE_LOW_50G_AUI1:
+ case ICE_PHY_TYPE_LOW_100G_AUI4:
+ case ICE_PHY_TYPE_LOW_100G_CAUI4:
+ if (ice_is_media_cage_present(pi))
+ return ICE_MEDIA_DA;
+ fallthrough;
+ case ICE_PHY_TYPE_LOW_1000BASE_KX:
+ case ICE_PHY_TYPE_LOW_2500BASE_KX:
+ case ICE_PHY_TYPE_LOW_2500BASE_X:
+ case ICE_PHY_TYPE_LOW_5GBASE_KR:
+ case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
+ case ICE_PHY_TYPE_LOW_25GBASE_KR:
+ case ICE_PHY_TYPE_LOW_25GBASE_KR1:
+ case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
+ case ICE_PHY_TYPE_LOW_40GBASE_KR4:
+ case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
+ case ICE_PHY_TYPE_LOW_50GBASE_KR2:
+ case ICE_PHY_TYPE_LOW_100GBASE_KR4:
+ case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
+ return ICE_MEDIA_BACKPLANE;
+ }
+ } else {
+ switch (hw_link_info->phy_type_high) {
+ case ICE_PHY_TYPE_HIGH_100G_AUI2:
+ case ICE_PHY_TYPE_HIGH_100G_CAUI2:
+ if (ice_is_media_cage_present(pi))
+ return ICE_MEDIA_DA;
+ fallthrough;
+ case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
+ return ICE_MEDIA_BACKPLANE;
+ case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
+ case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
+ return ICE_MEDIA_FIBER;
+ }
+ }
+ return ICE_MEDIA_UNKNOWN;
+}
+
+/**
+ * ice_aq_get_link_info
+ * @pi: port information structure
+ * @ena_lse: enable/disable LinkStatusEvent reporting
+ * @link: pointer to link status structure - optional
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get Link Status (0x607). Returns the link status of the adapter.
+ */
+int
+ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
+ struct ice_link_status *link, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_get_link_status_data link_data = { 0 };
+ struct ice_aqc_get_link_status *resp;
+ struct ice_link_status *li_old, *li;
+ enum ice_media_type *hw_media_type;
+ struct ice_fc_info *hw_fc_info;
+ bool tx_pause, rx_pause;
+ struct ice_aq_desc desc;
+ struct ice_hw *hw;
+ u16 cmd_flags;
+ int status;
+
+ if (!pi)
+ return -EINVAL;
+ hw = pi->hw;
+ li_old = &pi->phy.link_info_old;
+ hw_media_type = &pi->phy.media_type;
+ li = &pi->phy.link_info;
+ hw_fc_info = &pi->fc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status);
+ cmd_flags = (ena_lse) ? ICE_AQ_LSE_ENA : ICE_AQ_LSE_DIS;
+ resp = &desc.params.get_link_status;
+ resp->cmd_flags = cpu_to_le16(cmd_flags);
+ resp->lport_num = pi->lport;
+
+ status = ice_aq_send_cmd(hw, &desc, &link_data, sizeof(link_data), cd);
+
+ if (status)
+ return status;
+
+ /* save off old link status information */
+ *li_old = *li;
+
+ /* update current link status information */
+ li->link_speed = le16_to_cpu(link_data.link_speed);
+ li->phy_type_low = le64_to_cpu(link_data.phy_type_low);
+ li->phy_type_high = le64_to_cpu(link_data.phy_type_high);
+ *hw_media_type = ice_get_media_type(pi);
+ li->link_info = link_data.link_info;
+ li->link_cfg_err = link_data.link_cfg_err;
+ li->an_info = link_data.an_info;
+ li->ext_info = link_data.ext_info;
+ li->max_frame_size = le16_to_cpu(link_data.max_frame_size);
+ li->fec_info = link_data.cfg & ICE_AQ_FEC_MASK;
+ li->topo_media_conflict = link_data.topo_media_conflict;
+ li->pacing = link_data.cfg & (ICE_AQ_CFG_PACING_M |
+ ICE_AQ_CFG_PACING_TYPE_M);
+
+ /* update fc info */
+ tx_pause = !!(link_data.an_info & ICE_AQ_LINK_PAUSE_TX);
+ rx_pause = !!(link_data.an_info & ICE_AQ_LINK_PAUSE_RX);
+ if (tx_pause && rx_pause)
+ hw_fc_info->current_mode = ICE_FC_FULL;
+ else if (tx_pause)
+ hw_fc_info->current_mode = ICE_FC_TX_PAUSE;
+ else if (rx_pause)
+ hw_fc_info->current_mode = ICE_FC_RX_PAUSE;
+ else
+ hw_fc_info->current_mode = ICE_FC_NONE;
+
+ li->lse_ena = !!(resp->cmd_flags & cpu_to_le16(ICE_AQ_LSE_IS_ENABLED));
+
+ ice_debug(hw, ICE_DBG_LINK, "get link info\n");
+ ice_debug(hw, ICE_DBG_LINK, " link_speed = 0x%x\n", li->link_speed);
+ ice_debug(hw, ICE_DBG_LINK, " phy_type_low = 0x%llx\n",
+ (unsigned long long)li->phy_type_low);
+ ice_debug(hw, ICE_DBG_LINK, " phy_type_high = 0x%llx\n",
+ (unsigned long long)li->phy_type_high);
+ ice_debug(hw, ICE_DBG_LINK, " media_type = 0x%x\n", *hw_media_type);
+ ice_debug(hw, ICE_DBG_LINK, " link_info = 0x%x\n", li->link_info);
+ ice_debug(hw, ICE_DBG_LINK, " link_cfg_err = 0x%x\n", li->link_cfg_err);
+ ice_debug(hw, ICE_DBG_LINK, " an_info = 0x%x\n", li->an_info);
+ ice_debug(hw, ICE_DBG_LINK, " ext_info = 0x%x\n", li->ext_info);
+ ice_debug(hw, ICE_DBG_LINK, " fec_info = 0x%x\n", li->fec_info);
+ ice_debug(hw, ICE_DBG_LINK, " lse_ena = 0x%x\n", li->lse_ena);
+ ice_debug(hw, ICE_DBG_LINK, " max_frame = 0x%x\n",
+ li->max_frame_size);
+ ice_debug(hw, ICE_DBG_LINK, " pacing = 0x%x\n", li->pacing);
+
+ /* save link status information */
+ if (link)
+ *link = *li;
+
+ /* flag cleared so calling functions don't call AQ again */
+ pi->phy.get_link_info = false;
+
+ return 0;
+}
+
+/**
+ * ice_fill_tx_timer_and_fc_thresh
+ * @hw: pointer to the HW struct
+ * @cmd: pointer to MAC cfg structure
+ *
+ * Add Tx timer and FC refresh threshold info to Set MAC Config AQ command
+ * descriptor
+ */
+static void
+ice_fill_tx_timer_and_fc_thresh(struct ice_hw *hw,
+ struct ice_aqc_set_mac_cfg *cmd)
+{
+ u16 fc_thres_val, tx_timer_val;
+ u32 val;
+
+ /* We read back the transmit timer and FC threshold value of
+ * LFC. Thus, we will use index =
+ * PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_MAX_INDEX.
+ *
+ * Also, because we are operating on transmit timer and FC
+ * threshold of LFC, we don't turn on any bit in tx_tmr_priority
+ */
+#define IDX_OF_LFC PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_MAX_INDEX
+
+ /* Retrieve the transmit timer */
+ val = rd32(hw, PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA(IDX_OF_LFC));
+ tx_timer_val = val &
+ PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_HSEC_CTL_TX_PAUSE_QUANTA_M;
+ cmd->tx_tmr_value = cpu_to_le16(tx_timer_val);
+
+ /* Retrieve the FC threshold */
+ val = rd32(hw, PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(IDX_OF_LFC));
+ fc_thres_val = val & PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_M;
+
+ cmd->fc_refresh_threshold = cpu_to_le16(fc_thres_val);
+}
+
+/**
+ * ice_aq_set_mac_cfg
+ * @hw: pointer to the HW struct
+ * @max_frame_size: Maximum Frame Size to be supported
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set MAC configuration (0x0603)
+ */
+int
+ice_aq_set_mac_cfg(struct ice_hw *hw, u16 max_frame_size, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_set_mac_cfg *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.set_mac_cfg;
+
+ if (max_frame_size == 0)
+ return -EINVAL;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_mac_cfg);
+
+ cmd->max_frame_size = cpu_to_le16(max_frame_size);
+
+ ice_fill_tx_timer_and_fc_thresh(hw, cmd);
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_init_fltr_mgmt_struct - initializes filter management list and locks
+ * @hw: pointer to the HW struct
+ */
+static int ice_init_fltr_mgmt_struct(struct ice_hw *hw)
+{
+ struct ice_switch_info *sw;
+ int status;
+
+ hw->switch_info = devm_kzalloc(ice_hw_to_dev(hw),
+ sizeof(*hw->switch_info), GFP_KERNEL);
+ sw = hw->switch_info;
+
+ if (!sw)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&sw->vsi_list_map_head);
+ sw->prof_res_bm_init = 0;
+
+ status = ice_init_def_sw_recp(hw);
+ if (status) {
+ devm_kfree(ice_hw_to_dev(hw), hw->switch_info);
+ return status;
+ }
+ return 0;
+}
+
+/**
+ * ice_cleanup_fltr_mgmt_struct - cleanup filter management list and locks
+ * @hw: pointer to the HW struct
+ */
+static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw)
+{
+ struct ice_switch_info *sw = hw->switch_info;
+ struct ice_vsi_list_map_info *v_pos_map;
+ struct ice_vsi_list_map_info *v_tmp_map;
+ struct ice_sw_recipe *recps;
+ u8 i;
+
+ list_for_each_entry_safe(v_pos_map, v_tmp_map, &sw->vsi_list_map_head,
+ list_entry) {
+ list_del(&v_pos_map->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), v_pos_map);
+ }
+ recps = sw->recp_list;
+ for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) {
+ struct ice_recp_grp_entry *rg_entry, *tmprg_entry;
+
+ recps[i].root_rid = i;
+ list_for_each_entry_safe(rg_entry, tmprg_entry,
+ &recps[i].rg_list, l_entry) {
+ list_del(&rg_entry->l_entry);
+ devm_kfree(ice_hw_to_dev(hw), rg_entry);
+ }
+
+ if (recps[i].adv_rule) {
+ struct ice_adv_fltr_mgmt_list_entry *tmp_entry;
+ struct ice_adv_fltr_mgmt_list_entry *lst_itr;
+
+ mutex_destroy(&recps[i].filt_rule_lock);
+ list_for_each_entry_safe(lst_itr, tmp_entry,
+ &recps[i].filt_rules,
+ list_entry) {
+ list_del(&lst_itr->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), lst_itr->lkups);
+ devm_kfree(ice_hw_to_dev(hw), lst_itr);
+ }
+ } else {
+ struct ice_fltr_mgmt_list_entry *lst_itr, *tmp_entry;
+
+ mutex_destroy(&recps[i].filt_rule_lock);
+ list_for_each_entry_safe(lst_itr, tmp_entry,
+ &recps[i].filt_rules,
+ list_entry) {
+ list_del(&lst_itr->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), lst_itr);
+ }
+ }
+ if (recps[i].root_buf)
+ devm_kfree(ice_hw_to_dev(hw), recps[i].root_buf);
+ }
+ ice_rm_all_sw_replay_rule_info(hw);
+ devm_kfree(ice_hw_to_dev(hw), sw->recp_list);
+ devm_kfree(ice_hw_to_dev(hw), sw);
+}
+
+/**
+ * ice_get_fw_log_cfg - get FW logging configuration
+ * @hw: pointer to the HW struct
+ */
+static int ice_get_fw_log_cfg(struct ice_hw *hw)
+{
+ struct ice_aq_desc desc;
+ __le16 *config;
+ int status;
+ u16 size;
+
+ size = sizeof(*config) * ICE_AQC_FW_LOG_ID_MAX;
+ config = devm_kzalloc(ice_hw_to_dev(hw), size, GFP_KERNEL);
+ if (!config)
+ return -ENOMEM;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logging_info);
+
+ status = ice_aq_send_cmd(hw, &desc, config, size, NULL);
+ if (!status) {
+ u16 i;
+
+ /* Save FW logging information into the HW structure */
+ for (i = 0; i < ICE_AQC_FW_LOG_ID_MAX; i++) {
+ u16 v, m, flgs;
+
+ v = le16_to_cpu(config[i]);
+ m = (v & ICE_AQC_FW_LOG_ID_M) >> ICE_AQC_FW_LOG_ID_S;
+ flgs = (v & ICE_AQC_FW_LOG_EN_M) >> ICE_AQC_FW_LOG_EN_S;
+
+ if (m < ICE_AQC_FW_LOG_ID_MAX)
+ hw->fw_log.evnts[m].cur = flgs;
+ }
+ }
+
+ devm_kfree(ice_hw_to_dev(hw), config);
+
+ return status;
+}
+
+/**
+ * ice_cfg_fw_log - configure FW logging
+ * @hw: pointer to the HW struct
+ * @enable: enable certain FW logging events if true, disable all if false
+ *
+ * This function enables/disables the FW logging via Rx CQ events and a UART
+ * port based on predetermined configurations. FW logging via the Rx CQ can be
+ * enabled/disabled for individual PF's. However, FW logging via the UART can
+ * only be enabled/disabled for all PFs on the same device.
+ *
+ * To enable overall FW logging, the "cq_en" and "uart_en" enable bits in
+ * hw->fw_log need to be set accordingly, e.g. based on user-provided input,
+ * before initializing the device.
+ *
+ * When re/configuring FW logging, callers need to update the "cfg" elements of
+ * the hw->fw_log.evnts array with the desired logging event configurations for
+ * modules of interest. When disabling FW logging completely, the callers can
+ * just pass false in the "enable" parameter. On completion, the function will
+ * update the "cur" element of the hw->fw_log.evnts array with the resulting
+ * logging event configurations of the modules that are being re/configured. FW
+ * logging modules that are not part of a reconfiguration operation retain their
+ * previous states.
+ *
+ * Before resetting the device, it is recommended that the driver disables FW
+ * logging before shutting down the control queue. When disabling FW logging
+ * ("enable" = false), the latest configurations of FW logging events stored in
+ * hw->fw_log.evnts[] are not overridden to allow them to be reconfigured after
+ * a device reset.
+ *
+ * When enabling FW logging to emit log messages via the Rx CQ during the
+ * device's initialization phase, a mechanism alternative to interrupt handlers
+ * needs to be used to extract FW log messages from the Rx CQ periodically and
+ * to prevent the Rx CQ from being full and stalling other types of control
+ * messages from FW to SW. Interrupts are typically disabled during the device's
+ * initialization phase.
+ */
+static int ice_cfg_fw_log(struct ice_hw *hw, bool enable)
+{
+ struct ice_aqc_fw_logging *cmd;
+ u16 i, chgs = 0, len = 0;
+ struct ice_aq_desc desc;
+ __le16 *data = NULL;
+ u8 actv_evnts = 0;
+ void *buf = NULL;
+ int status = 0;
+
+ if (!hw->fw_log.cq_en && !hw->fw_log.uart_en)
+ return 0;
+
+ /* Disable FW logging only when the control queue is still responsive */
+ if (!enable &&
+ (!hw->fw_log.actv_evnts || !ice_check_sq_alive(hw, &hw->adminq)))
+ return 0;
+
+ /* Get current FW log settings */
+ status = ice_get_fw_log_cfg(hw);
+ if (status)
+ return status;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logging);
+ cmd = &desc.params.fw_logging;
+
+ /* Indicate which controls are valid */
+ if (hw->fw_log.cq_en)
+ cmd->log_ctrl_valid |= ICE_AQC_FW_LOG_AQ_VALID;
+
+ if (hw->fw_log.uart_en)
+ cmd->log_ctrl_valid |= ICE_AQC_FW_LOG_UART_VALID;
+
+ if (enable) {
+ /* Fill in an array of entries with FW logging modules and
+ * logging events being reconfigured.
+ */
+ for (i = 0; i < ICE_AQC_FW_LOG_ID_MAX; i++) {
+ u16 val;
+
+ /* Keep track of enabled event types */
+ actv_evnts |= hw->fw_log.evnts[i].cfg;
+
+ if (hw->fw_log.evnts[i].cfg == hw->fw_log.evnts[i].cur)
+ continue;
+
+ if (!data) {
+ data = devm_kcalloc(ice_hw_to_dev(hw),
+ ICE_AQC_FW_LOG_ID_MAX,
+ sizeof(*data),
+ GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ }
+
+ val = i << ICE_AQC_FW_LOG_ID_S;
+ val |= hw->fw_log.evnts[i].cfg << ICE_AQC_FW_LOG_EN_S;
+ data[chgs++] = cpu_to_le16(val);
+ }
+
+ /* Only enable FW logging if at least one module is specified.
+ * If FW logging is currently enabled but all modules are not
+ * enabled to emit log messages, disable FW logging altogether.
+ */
+ if (actv_evnts) {
+ /* Leave if there is effectively no change */
+ if (!chgs)
+ goto out;
+
+ if (hw->fw_log.cq_en)
+ cmd->log_ctrl |= ICE_AQC_FW_LOG_AQ_EN;
+
+ if (hw->fw_log.uart_en)
+ cmd->log_ctrl |= ICE_AQC_FW_LOG_UART_EN;
+
+ buf = data;
+ len = sizeof(*data) * chgs;
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+ }
+ }
+
+ status = ice_aq_send_cmd(hw, &desc, buf, len, NULL);
+ if (!status) {
+ /* Update the current configuration to reflect events enabled.
+ * hw->fw_log.cq_en and hw->fw_log.uart_en indicate if the FW
+ * logging mode is enabled for the device. They do not reflect
+ * actual modules being enabled to emit log messages. So, their
+ * values remain unchanged even when all modules are disabled.
+ */
+ u16 cnt = enable ? chgs : (u16)ICE_AQC_FW_LOG_ID_MAX;
+
+ hw->fw_log.actv_evnts = actv_evnts;
+ for (i = 0; i < cnt; i++) {
+ u16 v, m;
+
+ if (!enable) {
+ /* When disabling all FW logging events as part
+ * of device's de-initialization, the original
+ * configurations are retained, and can be used
+ * to reconfigure FW logging later if the device
+ * is re-initialized.
+ */
+ hw->fw_log.evnts[i].cur = 0;
+ continue;
+ }
+
+ v = le16_to_cpu(data[i]);
+ m = (v & ICE_AQC_FW_LOG_ID_M) >> ICE_AQC_FW_LOG_ID_S;
+ hw->fw_log.evnts[m].cur = hw->fw_log.evnts[m].cfg;
+ }
+ }
+
+out:
+ if (data)
+ devm_kfree(ice_hw_to_dev(hw), data);
+
+ return status;
+}
+
+/**
+ * ice_output_fw_log
+ * @hw: pointer to the HW struct
+ * @desc: pointer to the AQ message descriptor
+ * @buf: pointer to the buffer accompanying the AQ message
+ *
+ * Formats a FW Log message and outputs it via the standard driver logs.
+ */
+void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf)
+{
+ ice_debug(hw, ICE_DBG_FW_LOG, "[ FW Log Msg Start ]\n");
+ ice_debug_array(hw, ICE_DBG_FW_LOG, 16, 1, (u8 *)buf,
+ le16_to_cpu(desc->datalen));
+ ice_debug(hw, ICE_DBG_FW_LOG, "[ FW Log Msg End ]\n");
+}
+
+/**
+ * ice_get_itr_intrl_gran
+ * @hw: pointer to the HW struct
+ *
+ * Determines the ITR/INTRL granularities based on the maximum aggregate
+ * bandwidth according to the device's configuration during power-on.
+ */
+static void ice_get_itr_intrl_gran(struct ice_hw *hw)
+{
+ u8 max_agg_bw = (rd32(hw, GL_PWR_MODE_CTL) &
+ GL_PWR_MODE_CTL_CAR_MAX_BW_M) >>
+ GL_PWR_MODE_CTL_CAR_MAX_BW_S;
+
+ switch (max_agg_bw) {
+ case ICE_MAX_AGG_BW_200G:
+ case ICE_MAX_AGG_BW_100G:
+ case ICE_MAX_AGG_BW_50G:
+ hw->itr_gran = ICE_ITR_GRAN_ABOVE_25;
+ hw->intrl_gran = ICE_INTRL_GRAN_ABOVE_25;
+ break;
+ case ICE_MAX_AGG_BW_25G:
+ hw->itr_gran = ICE_ITR_GRAN_MAX_25;
+ hw->intrl_gran = ICE_INTRL_GRAN_MAX_25;
+ break;
+ }
+}
+
+/**
+ * ice_init_hw - main hardware initialization routine
+ * @hw: pointer to the hardware structure
+ */
+int ice_init_hw(struct ice_hw *hw)
+{
+ struct ice_aqc_get_phy_caps_data *pcaps;
+ u16 mac_buf_len;
+ void *mac_buf;
+ int status;
+
+ /* Set MAC type based on DeviceID */
+ status = ice_set_mac_type(hw);
+ if (status)
+ return status;
+
+ hw->pf_id = (u8)(rd32(hw, PF_FUNC_RID) &
+ PF_FUNC_RID_FUNC_NUM_M) >>
+ PF_FUNC_RID_FUNC_NUM_S;
+
+ status = ice_reset(hw, ICE_RESET_PFR);
+ if (status)
+ return status;
+
+ ice_get_itr_intrl_gran(hw);
+
+ status = ice_create_all_ctrlq(hw);
+ if (status)
+ goto err_unroll_cqinit;
+
+ /* Enable FW logging. Not fatal if this fails. */
+ status = ice_cfg_fw_log(hw, true);
+ if (status)
+ ice_debug(hw, ICE_DBG_INIT, "Failed to enable FW logging.\n");
+
+ status = ice_clear_pf_cfg(hw);
+ if (status)
+ goto err_unroll_cqinit;
+
+ /* Set bit to enable Flow Director filters */
+ wr32(hw, PFQF_FD_ENA, PFQF_FD_ENA_FD_ENA_M);
+ INIT_LIST_HEAD(&hw->fdir_list_head);
+
+ ice_clear_pxe_mode(hw);
+
+ status = ice_init_nvm(hw);
+ if (status)
+ goto err_unroll_cqinit;
+
+ status = ice_get_caps(hw);
+ if (status)
+ goto err_unroll_cqinit;
+
+ hw->port_info = devm_kzalloc(ice_hw_to_dev(hw),
+ sizeof(*hw->port_info), GFP_KERNEL);
+ if (!hw->port_info) {
+ status = -ENOMEM;
+ goto err_unroll_cqinit;
+ }
+
+ /* set the back pointer to HW */
+ hw->port_info->hw = hw;
+
+ /* Initialize port_info struct with switch configuration data */
+ status = ice_get_initial_sw_cfg(hw);
+ if (status)
+ goto err_unroll_alloc;
+
+ hw->evb_veb = true;
+
+ /* Query the allocated resources for Tx scheduler */
+ status = ice_sched_query_res_alloc(hw);
+ if (status) {
+ ice_debug(hw, ICE_DBG_SCHED, "Failed to get scheduler allocated resources\n");
+ goto err_unroll_alloc;
+ }
+ ice_sched_get_psm_clk_freq(hw);
+
+ /* Initialize port_info struct with scheduler data */
+ status = ice_sched_init_port(hw->port_info);
+ if (status)
+ goto err_unroll_sched;
+
+ pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
+ if (!pcaps) {
+ status = -ENOMEM;
+ goto err_unroll_sched;
+ }
+
+ /* Initialize port_info struct with PHY capabilities */
+ status = ice_aq_get_phy_caps(hw->port_info, false,
+ ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps,
+ NULL);
+ devm_kfree(ice_hw_to_dev(hw), pcaps);
+ if (status)
+ dev_warn(ice_hw_to_dev(hw), "Get PHY capabilities failed status = %d, continuing anyway\n",
+ status);
+
+ /* Initialize port_info struct with link information */
+ status = ice_aq_get_link_info(hw->port_info, false, NULL, NULL);
+ if (status)
+ goto err_unroll_sched;
+
+ /* need a valid SW entry point to build a Tx tree */
+ if (!hw->sw_entry_point_layer) {
+ ice_debug(hw, ICE_DBG_SCHED, "invalid sw entry point\n");
+ status = -EIO;
+ goto err_unroll_sched;
+ }
+ INIT_LIST_HEAD(&hw->agg_list);
+ /* Initialize max burst size */
+ if (!hw->max_burst_size)
+ ice_cfg_rl_burst_size(hw, ICE_SCHED_DFLT_BURST_SIZE);
+
+ status = ice_init_fltr_mgmt_struct(hw);
+ if (status)
+ goto err_unroll_sched;
+
+ /* Get MAC information */
+ /* A single port can report up to two (LAN and WoL) addresses */
+ mac_buf = devm_kcalloc(ice_hw_to_dev(hw), 2,
+ sizeof(struct ice_aqc_manage_mac_read_resp),
+ GFP_KERNEL);
+ mac_buf_len = 2 * sizeof(struct ice_aqc_manage_mac_read_resp);
+
+ if (!mac_buf) {
+ status = -ENOMEM;
+ goto err_unroll_fltr_mgmt_struct;
+ }
+
+ status = ice_aq_manage_mac_read(hw, mac_buf, mac_buf_len, NULL);
+ devm_kfree(ice_hw_to_dev(hw), mac_buf);
+
+ if (status)
+ goto err_unroll_fltr_mgmt_struct;
+ /* enable jumbo frame support at MAC level */
+ status = ice_aq_set_mac_cfg(hw, ICE_AQ_SET_MAC_FRAME_SIZE_MAX, NULL);
+ if (status)
+ goto err_unroll_fltr_mgmt_struct;
+ /* Obtain counter base index which would be used by flow director */
+ status = ice_alloc_fd_res_cntr(hw, &hw->fd_ctr_base);
+ if (status)
+ goto err_unroll_fltr_mgmt_struct;
+ status = ice_init_hw_tbls(hw);
+ if (status)
+ goto err_unroll_fltr_mgmt_struct;
+ mutex_init(&hw->tnl_lock);
+ return 0;
+
+err_unroll_fltr_mgmt_struct:
+ ice_cleanup_fltr_mgmt_struct(hw);
+err_unroll_sched:
+ ice_sched_cleanup_all(hw);
+err_unroll_alloc:
+ devm_kfree(ice_hw_to_dev(hw), hw->port_info);
+err_unroll_cqinit:
+ ice_destroy_all_ctrlq(hw);
+ return status;
+}
+
+/**
+ * ice_deinit_hw - unroll initialization operations done by ice_init_hw
+ * @hw: pointer to the hardware structure
+ *
+ * This should be called only during nominal operation, not as a result of
+ * ice_init_hw() failing since ice_init_hw() will take care of unrolling
+ * applicable initializations if it fails for any reason.
+ */
+void ice_deinit_hw(struct ice_hw *hw)
+{
+ ice_free_fd_res_cntr(hw, hw->fd_ctr_base);
+ ice_cleanup_fltr_mgmt_struct(hw);
+
+ ice_sched_cleanup_all(hw);
+ ice_sched_clear_agg(hw);
+ ice_free_seg(hw);
+ ice_free_hw_tbls(hw);
+ mutex_destroy(&hw->tnl_lock);
+
+ if (hw->port_info) {
+ devm_kfree(ice_hw_to_dev(hw), hw->port_info);
+ hw->port_info = NULL;
+ }
+
+ /* Attempt to disable FW logging before shutting down control queues */
+ ice_cfg_fw_log(hw, false);
+ ice_destroy_all_ctrlq(hw);
+
+ /* Clear VSI contexts if not already cleared */
+ ice_clear_all_vsi_ctx(hw);
+}
+
+/**
+ * ice_check_reset - Check to see if a global reset is complete
+ * @hw: pointer to the hardware structure
+ */
+int ice_check_reset(struct ice_hw *hw)
+{
+ u32 cnt, reg = 0, grst_timeout, uld_mask;
+
+ /* Poll for Device Active state in case a recent CORER, GLOBR,
+ * or EMPR has occurred. The grst delay value is in 100ms units.
+ * Add 1sec for outstanding AQ commands that can take a long time.
+ */
+ grst_timeout = ((rd32(hw, GLGEN_RSTCTL) & GLGEN_RSTCTL_GRSTDEL_M) >>
+ GLGEN_RSTCTL_GRSTDEL_S) + 10;
+
+ for (cnt = 0; cnt < grst_timeout; cnt++) {
+ mdelay(100);
+ reg = rd32(hw, GLGEN_RSTAT);
+ if (!(reg & GLGEN_RSTAT_DEVSTATE_M))
+ break;
+ }
+
+ if (cnt == grst_timeout) {
+ ice_debug(hw, ICE_DBG_INIT, "Global reset polling failed to complete.\n");
+ return -EIO;
+ }
+
+#define ICE_RESET_DONE_MASK (GLNVM_ULD_PCIER_DONE_M |\
+ GLNVM_ULD_PCIER_DONE_1_M |\
+ GLNVM_ULD_CORER_DONE_M |\
+ GLNVM_ULD_GLOBR_DONE_M |\
+ GLNVM_ULD_POR_DONE_M |\
+ GLNVM_ULD_POR_DONE_1_M |\
+ GLNVM_ULD_PCIER_DONE_2_M)
+
+ uld_mask = ICE_RESET_DONE_MASK | (hw->func_caps.common_cap.rdma ?
+ GLNVM_ULD_PE_DONE_M : 0);
+
+ /* Device is Active; check Global Reset processes are done */
+ for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) {
+ reg = rd32(hw, GLNVM_ULD) & uld_mask;
+ if (reg == uld_mask) {
+ ice_debug(hw, ICE_DBG_INIT, "Global reset processes done. %d\n", cnt);
+ break;
+ }
+ mdelay(10);
+ }
+
+ if (cnt == ICE_PF_RESET_WAIT_COUNT) {
+ ice_debug(hw, ICE_DBG_INIT, "Wait for Reset Done timed out. GLNVM_ULD = 0x%x\n",
+ reg);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_pf_reset - Reset the PF
+ * @hw: pointer to the hardware structure
+ *
+ * If a global reset has been triggered, this function checks
+ * for its completion and then issues the PF reset
+ */
+static int ice_pf_reset(struct ice_hw *hw)
+{
+ u32 cnt, reg;
+
+ /* If at function entry a global reset was already in progress, i.e.
+ * state is not 'device active' or any of the reset done bits are not
+ * set in GLNVM_ULD, there is no need for a PF Reset; poll until the
+ * global reset is done.
+ */
+ if ((rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_DEVSTATE_M) ||
+ (rd32(hw, GLNVM_ULD) & ICE_RESET_DONE_MASK) ^ ICE_RESET_DONE_MASK) {
+ /* poll on global reset currently in progress until done */
+ if (ice_check_reset(hw))
+ return -EIO;
+
+ return 0;
+ }
+
+ /* Reset the PF */
+ reg = rd32(hw, PFGEN_CTRL);
+
+ wr32(hw, PFGEN_CTRL, (reg | PFGEN_CTRL_PFSWR_M));
+
+ /* Wait for the PFR to complete. The wait time is the global config lock
+ * timeout plus the PFR timeout which will account for a possible reset
+ * that is occurring during a download package operation.
+ */
+ for (cnt = 0; cnt < ICE_GLOBAL_CFG_LOCK_TIMEOUT +
+ ICE_PF_RESET_WAIT_COUNT; cnt++) {
+ reg = rd32(hw, PFGEN_CTRL);
+ if (!(reg & PFGEN_CTRL_PFSWR_M))
+ break;
+
+ mdelay(1);
+ }
+
+ if (cnt == ICE_PF_RESET_WAIT_COUNT) {
+ ice_debug(hw, ICE_DBG_INIT, "PF reset polling failed to complete.\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_reset - Perform different types of reset
+ * @hw: pointer to the hardware structure
+ * @req: reset request
+ *
+ * This function triggers a reset as specified by the req parameter.
+ *
+ * Note:
+ * If anything other than a PF reset is triggered, PXE mode is restored.
+ * This has to be cleared using ice_clear_pxe_mode again, once the AQ
+ * interface has been restored in the rebuild flow.
+ */
+int ice_reset(struct ice_hw *hw, enum ice_reset_req req)
+{
+ u32 val = 0;
+
+ switch (req) {
+ case ICE_RESET_PFR:
+ return ice_pf_reset(hw);
+ case ICE_RESET_CORER:
+ ice_debug(hw, ICE_DBG_INIT, "CoreR requested\n");
+ val = GLGEN_RTRIG_CORER_M;
+ break;
+ case ICE_RESET_GLOBR:
+ ice_debug(hw, ICE_DBG_INIT, "GlobalR requested\n");
+ val = GLGEN_RTRIG_GLOBR_M;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ val |= rd32(hw, GLGEN_RTRIG);
+ wr32(hw, GLGEN_RTRIG, val);
+ ice_flush(hw);
+
+ /* wait for the FW to be ready */
+ return ice_check_reset(hw);
+}
+
+/**
+ * ice_copy_rxq_ctx_to_hw
+ * @hw: pointer to the hardware structure
+ * @ice_rxq_ctx: pointer to the rxq context
+ * @rxq_index: the index of the Rx queue
+ *
+ * Copies rxq context from dense structure to HW register space
+ */
+static int
+ice_copy_rxq_ctx_to_hw(struct ice_hw *hw, u8 *ice_rxq_ctx, u32 rxq_index)
+{
+ u8 i;
+
+ if (!ice_rxq_ctx)
+ return -EINVAL;
+
+ if (rxq_index > QRX_CTRL_MAX_INDEX)
+ return -EINVAL;
+
+ /* Copy each dword separately to HW */
+ for (i = 0; i < ICE_RXQ_CTX_SIZE_DWORDS; i++) {
+ wr32(hw, QRX_CONTEXT(i, rxq_index),
+ *((u32 *)(ice_rxq_ctx + (i * sizeof(u32)))));
+
+ ice_debug(hw, ICE_DBG_QCTX, "qrxdata[%d]: %08X\n", i,
+ *((u32 *)(ice_rxq_ctx + (i * sizeof(u32)))));
+ }
+
+ return 0;
+}
+
+/* LAN Rx Queue Context */
+static const struct ice_ctx_ele ice_rlan_ctx_info[] = {
+ /* Field Width LSB */
+ ICE_CTX_STORE(ice_rlan_ctx, head, 13, 0),
+ ICE_CTX_STORE(ice_rlan_ctx, cpuid, 8, 13),
+ ICE_CTX_STORE(ice_rlan_ctx, base, 57, 32),
+ ICE_CTX_STORE(ice_rlan_ctx, qlen, 13, 89),
+ ICE_CTX_STORE(ice_rlan_ctx, dbuf, 7, 102),
+ ICE_CTX_STORE(ice_rlan_ctx, hbuf, 5, 109),
+ ICE_CTX_STORE(ice_rlan_ctx, dtype, 2, 114),
+ ICE_CTX_STORE(ice_rlan_ctx, dsize, 1, 116),
+ ICE_CTX_STORE(ice_rlan_ctx, crcstrip, 1, 117),
+ ICE_CTX_STORE(ice_rlan_ctx, l2tsel, 1, 119),
+ ICE_CTX_STORE(ice_rlan_ctx, hsplit_0, 4, 120),
+ ICE_CTX_STORE(ice_rlan_ctx, hsplit_1, 2, 124),
+ ICE_CTX_STORE(ice_rlan_ctx, showiv, 1, 127),
+ ICE_CTX_STORE(ice_rlan_ctx, rxmax, 14, 174),
+ ICE_CTX_STORE(ice_rlan_ctx, tphrdesc_ena, 1, 193),
+ ICE_CTX_STORE(ice_rlan_ctx, tphwdesc_ena, 1, 194),
+ ICE_CTX_STORE(ice_rlan_ctx, tphdata_ena, 1, 195),
+ ICE_CTX_STORE(ice_rlan_ctx, tphhead_ena, 1, 196),
+ ICE_CTX_STORE(ice_rlan_ctx, lrxqthresh, 3, 198),
+ ICE_CTX_STORE(ice_rlan_ctx, prefena, 1, 201),
+ { 0 }
+};
+
+/**
+ * ice_write_rxq_ctx
+ * @hw: pointer to the hardware structure
+ * @rlan_ctx: pointer to the rxq context
+ * @rxq_index: the index of the Rx queue
+ *
+ * Converts rxq context from sparse to dense structure and then writes
+ * it to HW register space and enables the hardware to prefetch descriptors
+ * instead of only fetching them on demand
+ */
+int
+ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
+ u32 rxq_index)
+{
+ u8 ctx_buf[ICE_RXQ_CTX_SZ] = { 0 };
+
+ if (!rlan_ctx)
+ return -EINVAL;
+
+ rlan_ctx->prefena = 1;
+
+ ice_set_ctx(hw, (u8 *)rlan_ctx, ctx_buf, ice_rlan_ctx_info);
+ return ice_copy_rxq_ctx_to_hw(hw, ctx_buf, rxq_index);
+}
+
+/* LAN Tx Queue Context */
+const struct ice_ctx_ele ice_tlan_ctx_info[] = {
+ /* Field Width LSB */
+ ICE_CTX_STORE(ice_tlan_ctx, base, 57, 0),
+ ICE_CTX_STORE(ice_tlan_ctx, port_num, 3, 57),
+ ICE_CTX_STORE(ice_tlan_ctx, cgd_num, 5, 60),
+ ICE_CTX_STORE(ice_tlan_ctx, pf_num, 3, 65),
+ ICE_CTX_STORE(ice_tlan_ctx, vmvf_num, 10, 68),
+ ICE_CTX_STORE(ice_tlan_ctx, vmvf_type, 2, 78),
+ ICE_CTX_STORE(ice_tlan_ctx, src_vsi, 10, 80),
+ ICE_CTX_STORE(ice_tlan_ctx, tsyn_ena, 1, 90),
+ ICE_CTX_STORE(ice_tlan_ctx, internal_usage_flag, 1, 91),
+ ICE_CTX_STORE(ice_tlan_ctx, alt_vlan, 1, 92),
+ ICE_CTX_STORE(ice_tlan_ctx, cpuid, 8, 93),
+ ICE_CTX_STORE(ice_tlan_ctx, wb_mode, 1, 101),
+ ICE_CTX_STORE(ice_tlan_ctx, tphrd_desc, 1, 102),
+ ICE_CTX_STORE(ice_tlan_ctx, tphrd, 1, 103),
+ ICE_CTX_STORE(ice_tlan_ctx, tphwr_desc, 1, 104),
+ ICE_CTX_STORE(ice_tlan_ctx, cmpq_id, 9, 105),
+ ICE_CTX_STORE(ice_tlan_ctx, qnum_in_func, 14, 114),
+ ICE_CTX_STORE(ice_tlan_ctx, itr_notification_mode, 1, 128),
+ ICE_CTX_STORE(ice_tlan_ctx, adjust_prof_id, 6, 129),
+ ICE_CTX_STORE(ice_tlan_ctx, qlen, 13, 135),
+ ICE_CTX_STORE(ice_tlan_ctx, quanta_prof_idx, 4, 148),
+ ICE_CTX_STORE(ice_tlan_ctx, tso_ena, 1, 152),
+ ICE_CTX_STORE(ice_tlan_ctx, tso_qnum, 11, 153),
+ ICE_CTX_STORE(ice_tlan_ctx, legacy_int, 1, 164),
+ ICE_CTX_STORE(ice_tlan_ctx, drop_ena, 1, 165),
+ ICE_CTX_STORE(ice_tlan_ctx, cache_prof_idx, 2, 166),
+ ICE_CTX_STORE(ice_tlan_ctx, pkt_shaper_prof_idx, 3, 168),
+ ICE_CTX_STORE(ice_tlan_ctx, int_q_state, 122, 171),
+ { 0 }
+};
+
+/* Sideband Queue command wrappers */
+
+/**
+ * ice_sbq_send_cmd - send Sideband Queue command to Sideband Queue
+ * @hw: pointer to the HW struct
+ * @desc: descriptor describing the command
+ * @buf: buffer to use for indirect commands (NULL for direct commands)
+ * @buf_size: size of buffer for indirect commands (0 for direct commands)
+ * @cd: pointer to command details structure
+ */
+static int
+ice_sbq_send_cmd(struct ice_hw *hw, struct ice_sbq_cmd_desc *desc,
+ void *buf, u16 buf_size, struct ice_sq_cd *cd)
+{
+ return ice_sq_send_cmd(hw, ice_get_sbq(hw),
+ (struct ice_aq_desc *)desc, buf, buf_size, cd);
+}
+
+/**
+ * ice_sbq_rw_reg - Fill Sideband Queue command
+ * @hw: pointer to the HW struct
+ * @in: message info to be filled in descriptor
+ */
+int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in)
+{
+ struct ice_sbq_cmd_desc desc = {0};
+ struct ice_sbq_msg_req msg = {0};
+ u16 msg_len;
+ int status;
+
+ msg_len = sizeof(msg);
+
+ msg.dest_dev = in->dest_dev;
+ msg.opcode = in->opcode;
+ msg.flags = ICE_SBQ_MSG_FLAGS;
+ msg.sbe_fbe = ICE_SBQ_MSG_SBE_FBE;
+ msg.msg_addr_low = cpu_to_le16(in->msg_addr_low);
+ msg.msg_addr_high = cpu_to_le32(in->msg_addr_high);
+
+ if (in->opcode)
+ msg.data = cpu_to_le32(in->data);
+ else
+ /* data read comes back in completion, so shorten the struct by
+ * sizeof(msg.data)
+ */
+ msg_len -= sizeof(msg.data);
+
+ desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD);
+ desc.opcode = cpu_to_le16(ice_sbq_opc_neigh_dev_req);
+ desc.param0.cmd_len = cpu_to_le16(msg_len);
+ status = ice_sbq_send_cmd(hw, &desc, &msg, msg_len, NULL);
+ if (!status && !in->opcode)
+ in->data = le32_to_cpu
+ (((struct ice_sbq_msg_cmpl *)&msg)->data);
+ return status;
+}
+
+/* FW Admin Queue command wrappers */
+
+/* Software lock/mutex that is meant to be held while the Global Config Lock
+ * in firmware is acquired by the software to prevent most (but not all) types
+ * of AQ commands from being sent to FW
+ */
+DEFINE_MUTEX(ice_global_cfg_lock_sw);
+
+/**
+ * ice_should_retry_sq_send_cmd
+ * @opcode: AQ opcode
+ *
+ * Decide if we should retry the send command routine for the ATQ, depending
+ * on the opcode.
+ */
+static bool ice_should_retry_sq_send_cmd(u16 opcode)
+{
+ switch (opcode) {
+ case ice_aqc_opc_get_link_topo:
+ case ice_aqc_opc_lldp_stop:
+ case ice_aqc_opc_lldp_start:
+ case ice_aqc_opc_lldp_filter_ctrl:
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * ice_sq_send_cmd_retry - send command to Control Queue (ATQ)
+ * @hw: pointer to the HW struct
+ * @cq: pointer to the specific Control queue
+ * @desc: prefilled descriptor describing the command
+ * @buf: buffer to use for indirect commands (or NULL for direct commands)
+ * @buf_size: size of buffer for indirect commands (or 0 for direct commands)
+ * @cd: pointer to command details structure
+ *
+ * Retry sending the FW Admin Queue command, multiple times, to the FW Admin
+ * Queue if the EBUSY AQ error is returned.
+ */
+static int
+ice_sq_send_cmd_retry(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+ struct ice_aq_desc *desc, void *buf, u16 buf_size,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aq_desc desc_cpy;
+ bool is_cmd_for_retry;
+ u8 *buf_cpy = NULL;
+ u8 idx = 0;
+ u16 opcode;
+ int status;
+
+ opcode = le16_to_cpu(desc->opcode);
+ is_cmd_for_retry = ice_should_retry_sq_send_cmd(opcode);
+ memset(&desc_cpy, 0, sizeof(desc_cpy));
+
+ if (is_cmd_for_retry) {
+ if (buf) {
+ buf_cpy = kzalloc(buf_size, GFP_KERNEL);
+ if (!buf_cpy)
+ return -ENOMEM;
+ }
+
+ memcpy(&desc_cpy, desc, sizeof(desc_cpy));
+ }
+
+ do {
+ status = ice_sq_send_cmd(hw, cq, desc, buf, buf_size, cd);
+
+ if (!is_cmd_for_retry || !status ||
+ hw->adminq.sq_last_status != ICE_AQ_RC_EBUSY)
+ break;
+
+ if (buf_cpy)
+ memcpy(buf, buf_cpy, buf_size);
+
+ memcpy(desc, &desc_cpy, sizeof(desc_cpy));
+
+ mdelay(ICE_SQ_SEND_DELAY_TIME_MS);
+
+ } while (++idx < ICE_SQ_SEND_MAX_EXECUTE);
+
+ kfree(buf_cpy);
+
+ return status;
+}
+
+/**
+ * ice_aq_send_cmd - send FW Admin Queue command to FW Admin Queue
+ * @hw: pointer to the HW struct
+ * @desc: descriptor describing the command
+ * @buf: buffer to use for indirect commands (NULL for direct commands)
+ * @buf_size: size of buffer for indirect commands (0 for direct commands)
+ * @cd: pointer to command details structure
+ *
+ * Helper function to send FW Admin Queue commands to the FW Admin Queue.
+ */
+int
+ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf,
+ u16 buf_size, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_req_res *cmd = &desc->params.res_owner;
+ bool lock_acquired = false;
+ int status;
+
+ /* When a package download is in process (i.e. when the firmware's
+ * Global Configuration Lock resource is held), only the Download
+ * Package, Get Version, Get Package Info List, Upload Section,
+ * Update Package, Set Port Parameters, Get/Set VLAN Mode Parameters,
+ * Add Recipe, Set Recipes to Profile Association, Get Recipe, and Get
+ * Recipes to Profile Association, and Release Resource (with resource
+ * ID set to Global Config Lock) AdminQ commands are allowed; all others
+ * must block until the package download completes and the Global Config
+ * Lock is released. See also ice_acquire_global_cfg_lock().
+ */
+ switch (le16_to_cpu(desc->opcode)) {
+ case ice_aqc_opc_download_pkg:
+ case ice_aqc_opc_get_pkg_info_list:
+ case ice_aqc_opc_get_ver:
+ case ice_aqc_opc_upload_section:
+ case ice_aqc_opc_update_pkg:
+ case ice_aqc_opc_set_port_params:
+ case ice_aqc_opc_get_vlan_mode_parameters:
+ case ice_aqc_opc_set_vlan_mode_parameters:
+ case ice_aqc_opc_add_recipe:
+ case ice_aqc_opc_recipe_to_profile:
+ case ice_aqc_opc_get_recipe:
+ case ice_aqc_opc_get_recipe_to_profile:
+ break;
+ case ice_aqc_opc_release_res:
+ if (le16_to_cpu(cmd->res_id) == ICE_AQC_RES_ID_GLBL_LOCK)
+ break;
+ fallthrough;
+ default:
+ mutex_lock(&ice_global_cfg_lock_sw);
+ lock_acquired = true;
+ break;
+ }
+
+ status = ice_sq_send_cmd_retry(hw, &hw->adminq, desc, buf, buf_size, cd);
+ if (lock_acquired)
+ mutex_unlock(&ice_global_cfg_lock_sw);
+
+ return status;
+}
+
+/**
+ * ice_aq_get_fw_ver
+ * @hw: pointer to the HW struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get the firmware version (0x0001) from the admin queue commands
+ */
+int ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_get_ver *resp;
+ struct ice_aq_desc desc;
+ int status;
+
+ resp = &desc.params.get_ver;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_ver);
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+
+ if (!status) {
+ hw->fw_branch = resp->fw_branch;
+ hw->fw_maj_ver = resp->fw_major;
+ hw->fw_min_ver = resp->fw_minor;
+ hw->fw_patch = resp->fw_patch;
+ hw->fw_build = le32_to_cpu(resp->fw_build);
+ hw->api_branch = resp->api_branch;
+ hw->api_maj_ver = resp->api_major;
+ hw->api_min_ver = resp->api_minor;
+ hw->api_patch = resp->api_patch;
+ }
+
+ return status;
+}
+
+/**
+ * ice_aq_send_driver_ver
+ * @hw: pointer to the HW struct
+ * @dv: driver's major, minor version
+ * @cd: pointer to command details structure or NULL
+ *
+ * Send the driver version (0x0002) to the firmware
+ */
+int
+ice_aq_send_driver_ver(struct ice_hw *hw, struct ice_driver_ver *dv,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_driver_ver *cmd;
+ struct ice_aq_desc desc;
+ u16 len;
+
+ cmd = &desc.params.driver_ver;
+
+ if (!dv)
+ return -EINVAL;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_driver_ver);
+
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+ cmd->major_ver = dv->major_ver;
+ cmd->minor_ver = dv->minor_ver;
+ cmd->build_ver = dv->build_ver;
+ cmd->subbuild_ver = dv->subbuild_ver;
+
+ len = 0;
+ while (len < sizeof(dv->driver_string) &&
+ isascii(dv->driver_string[len]) && dv->driver_string[len])
+ len++;
+
+ return ice_aq_send_cmd(hw, &desc, dv->driver_string, len, cd);
+}
+
+/**
+ * ice_aq_q_shutdown
+ * @hw: pointer to the HW struct
+ * @unloading: is the driver unloading itself
+ *
+ * Tell the Firmware that we're shutting down the AdminQ and whether
+ * or not the driver is unloading as well (0x0003).
+ */
+int ice_aq_q_shutdown(struct ice_hw *hw, bool unloading)
+{
+ struct ice_aqc_q_shutdown *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.q_shutdown;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_q_shutdown);
+
+ if (unloading)
+ cmd->driver_unloading = ICE_AQC_DRIVER_UNLOADING;
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_req_res
+ * @hw: pointer to the HW struct
+ * @res: resource ID
+ * @access: access type
+ * @sdp_number: resource number
+ * @timeout: the maximum time in ms that the driver may hold the resource
+ * @cd: pointer to command details structure or NULL
+ *
+ * Requests common resource using the admin queue commands (0x0008).
+ * When attempting to acquire the Global Config Lock, the driver can
+ * learn of three states:
+ * 1) 0 - acquired lock, and can perform download package
+ * 2) -EIO - did not get lock, driver should fail to load
+ * 3) -EALREADY - did not get lock, but another driver has
+ * successfully downloaded the package; the driver does
+ * not have to download the package and can continue
+ * loading
+ *
+ * Note that if the caller is in an acquire lock, perform action, release lock
+ * phase of operation, it is possible that the FW may detect a timeout and issue
+ * a CORER. In this case, the driver will receive a CORER interrupt and will
+ * have to determine its cause. The calling thread that is handling this flow
+ * will likely get an error propagated back to it indicating the Download
+ * Package, Update Package or the Release Resource AQ commands timed out.
+ */
+static int
+ice_aq_req_res(struct ice_hw *hw, enum ice_aq_res_ids res,
+ enum ice_aq_res_access_type access, u8 sdp_number, u32 *timeout,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_req_res *cmd_resp;
+ struct ice_aq_desc desc;
+ int status;
+
+ cmd_resp = &desc.params.res_owner;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_req_res);
+
+ cmd_resp->res_id = cpu_to_le16(res);
+ cmd_resp->access_type = cpu_to_le16(access);
+ cmd_resp->res_number = cpu_to_le32(sdp_number);
+ cmd_resp->timeout = cpu_to_le32(*timeout);
+ *timeout = 0;
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+
+ /* The completion specifies the maximum time in ms that the driver
+ * may hold the resource in the Timeout field.
+ */
+
+ /* Global config lock response utilizes an additional status field.
+ *
+ * If the Global config lock resource is held by some other driver, the
+ * command completes with ICE_AQ_RES_GLBL_IN_PROG in the status field
+ * and the timeout field indicates the maximum time the current owner
+ * of the resource has to free it.
+ */
+ if (res == ICE_GLOBAL_CFG_LOCK_RES_ID) {
+ if (le16_to_cpu(cmd_resp->status) == ICE_AQ_RES_GLBL_SUCCESS) {
+ *timeout = le32_to_cpu(cmd_resp->timeout);
+ return 0;
+ } else if (le16_to_cpu(cmd_resp->status) ==
+ ICE_AQ_RES_GLBL_IN_PROG) {
+ *timeout = le32_to_cpu(cmd_resp->timeout);
+ return -EIO;
+ } else if (le16_to_cpu(cmd_resp->status) ==
+ ICE_AQ_RES_GLBL_DONE) {
+ return -EALREADY;
+ }
+
+ /* invalid FW response, force a timeout immediately */
+ *timeout = 0;
+ return -EIO;
+ }
+
+ /* If the resource is held by some other driver, the command completes
+ * with a busy return value and the timeout field indicates the maximum
+ * time the current owner of the resource has to free it.
+ */
+ if (!status || hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY)
+ *timeout = le32_to_cpu(cmd_resp->timeout);
+
+ return status;
+}
+
+/**
+ * ice_aq_release_res
+ * @hw: pointer to the HW struct
+ * @res: resource ID
+ * @sdp_number: resource number
+ * @cd: pointer to command details structure or NULL
+ *
+ * release common resource using the admin queue commands (0x0009)
+ */
+static int
+ice_aq_release_res(struct ice_hw *hw, enum ice_aq_res_ids res, u8 sdp_number,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_req_res *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.res_owner;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_release_res);
+
+ cmd->res_id = cpu_to_le16(res);
+ cmd->res_number = cpu_to_le32(sdp_number);
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_acquire_res
+ * @hw: pointer to the HW structure
+ * @res: resource ID
+ * @access: access type (read or write)
+ * @timeout: timeout in milliseconds
+ *
+ * This function will attempt to acquire the ownership of a resource.
+ */
+int
+ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
+ enum ice_aq_res_access_type access, u32 timeout)
+{
+#define ICE_RES_POLLING_DELAY_MS 10
+ u32 delay = ICE_RES_POLLING_DELAY_MS;
+ u32 time_left = timeout;
+ int status;
+
+ status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL);
+
+ /* A return code of -EALREADY means that another driver has
+ * previously acquired the resource and performed any necessary updates;
+ * in this case the caller does not obtain the resource and has no
+ * further work to do.
+ */
+ if (status == -EALREADY)
+ goto ice_acquire_res_exit;
+
+ if (status)
+ ice_debug(hw, ICE_DBG_RES, "resource %d acquire type %d failed.\n", res, access);
+
+ /* If necessary, poll until the current lock owner timeouts */
+ timeout = time_left;
+ while (status && timeout && time_left) {
+ mdelay(delay);
+ timeout = (timeout > delay) ? timeout - delay : 0;
+ status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL);
+
+ if (status == -EALREADY)
+ /* lock free, but no work to do */
+ break;
+
+ if (!status)
+ /* lock acquired */
+ break;
+ }
+ if (status && status != -EALREADY)
+ ice_debug(hw, ICE_DBG_RES, "resource acquire timed out.\n");
+
+ice_acquire_res_exit:
+ if (status == -EALREADY) {
+ if (access == ICE_RES_WRITE)
+ ice_debug(hw, ICE_DBG_RES, "resource indicates no work to do.\n");
+ else
+ ice_debug(hw, ICE_DBG_RES, "Warning: -EALREADY not expected\n");
+ }
+ return status;
+}
+
+/**
+ * ice_release_res
+ * @hw: pointer to the HW structure
+ * @res: resource ID
+ *
+ * This function will release a resource using the proper Admin Command.
+ */
+void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res)
+{
+ u32 total_delay = 0;
+ int status;
+
+ status = ice_aq_release_res(hw, res, 0, NULL);
+
+ /* there are some rare cases when trying to release the resource
+ * results in an admin queue timeout, so handle them correctly
+ */
+ while ((status == -EIO) && (total_delay < hw->adminq.sq_cmd_timeout)) {
+ mdelay(1);
+ status = ice_aq_release_res(hw, res, 0, NULL);
+ total_delay++;
+ }
+}
+
+/**
+ * ice_aq_alloc_free_res - command to allocate/free resources
+ * @hw: pointer to the HW struct
+ * @num_entries: number of resource entries in buffer
+ * @buf: Indirect buffer to hold data parameters and response
+ * @buf_size: size of buffer for indirect commands
+ * @opc: pass in the command opcode
+ * @cd: pointer to command details structure or NULL
+ *
+ * Helper function to allocate/free resources using the admin queue commands
+ */
+int
+ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries,
+ struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size,
+ enum ice_adminq_opc opc, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_alloc_free_res_cmd *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.sw_res_ctrl;
+
+ if (!buf)
+ return -EINVAL;
+
+ if (buf_size < flex_array_size(buf, elem, num_entries))
+ return -EINVAL;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, opc);
+
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ cmd->num_entries = cpu_to_le16(num_entries);
+
+ return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+}
+
+/**
+ * ice_alloc_hw_res - allocate resource
+ * @hw: pointer to the HW struct
+ * @type: type of resource
+ * @num: number of resources to allocate
+ * @btm: allocate from bottom
+ * @res: pointer to array that will receive the resources
+ */
+int
+ice_alloc_hw_res(struct ice_hw *hw, u16 type, u16 num, bool btm, u16 *res)
+{
+ struct ice_aqc_alloc_free_res_elem *buf;
+ u16 buf_len;
+ int status;
+
+ buf_len = struct_size(buf, elem, num);
+ buf = kzalloc(buf_len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ /* Prepare buffer to allocate resource. */
+ buf->num_elems = cpu_to_le16(num);
+ buf->res_type = cpu_to_le16(type | ICE_AQC_RES_TYPE_FLAG_DEDICATED |
+ ICE_AQC_RES_TYPE_FLAG_IGNORE_INDEX);
+ if (btm)
+ buf->res_type |= cpu_to_le16(ICE_AQC_RES_TYPE_FLAG_SCAN_BOTTOM);
+
+ status = ice_aq_alloc_free_res(hw, 1, buf, buf_len,
+ ice_aqc_opc_alloc_res, NULL);
+ if (status)
+ goto ice_alloc_res_exit;
+
+ memcpy(res, buf->elem, sizeof(*buf->elem) * num);
+
+ice_alloc_res_exit:
+ kfree(buf);
+ return status;
+}
+
+/**
+ * ice_free_hw_res - free allocated HW resource
+ * @hw: pointer to the HW struct
+ * @type: type of resource to free
+ * @num: number of resources
+ * @res: pointer to array that contains the resources to free
+ */
+int ice_free_hw_res(struct ice_hw *hw, u16 type, u16 num, u16 *res)
+{
+ struct ice_aqc_alloc_free_res_elem *buf;
+ u16 buf_len;
+ int status;
+
+ buf_len = struct_size(buf, elem, num);
+ buf = kzalloc(buf_len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ /* Prepare buffer to free resource. */
+ buf->num_elems = cpu_to_le16(num);
+ buf->res_type = cpu_to_le16(type);
+ memcpy(buf->elem, res, sizeof(*buf->elem) * num);
+
+ status = ice_aq_alloc_free_res(hw, num, buf, buf_len,
+ ice_aqc_opc_free_res, NULL);
+ if (status)
+ ice_debug(hw, ICE_DBG_SW, "CQ CMD Buffer:\n");
+
+ kfree(buf);
+ return status;
+}
+
+/**
+ * ice_get_num_per_func - determine number of resources per PF
+ * @hw: pointer to the HW structure
+ * @max: value to be evenly split between each PF
+ *
+ * Determine the number of valid functions by going through the bitmap returned
+ * from parsing capabilities and use this to calculate the number of resources
+ * per PF based on the max value passed in.
+ */
+static u32 ice_get_num_per_func(struct ice_hw *hw, u32 max)
+{
+ u8 funcs;
+
+#define ICE_CAPS_VALID_FUNCS_M 0xFF
+ funcs = hweight8(hw->dev_caps.common_cap.valid_functions &
+ ICE_CAPS_VALID_FUNCS_M);
+
+ if (!funcs)
+ return 0;
+
+ return max / funcs;
+}
+
+/**
+ * ice_parse_common_caps - parse common device/function capabilities
+ * @hw: pointer to the HW struct
+ * @caps: pointer to common capabilities structure
+ * @elem: the capability element to parse
+ * @prefix: message prefix for tracing capabilities
+ *
+ * Given a capability element, extract relevant details into the common
+ * capability structure.
+ *
+ * Returns: true if the capability matches one of the common capability ids,
+ * false otherwise.
+ */
+static bool
+ice_parse_common_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps,
+ struct ice_aqc_list_caps_elem *elem, const char *prefix)
+{
+ u32 logical_id = le32_to_cpu(elem->logical_id);
+ u32 phys_id = le32_to_cpu(elem->phys_id);
+ u32 number = le32_to_cpu(elem->number);
+ u16 cap = le16_to_cpu(elem->cap);
+ bool found = true;
+
+ switch (cap) {
+ case ICE_AQC_CAPS_VALID_FUNCTIONS:
+ caps->valid_functions = number;
+ ice_debug(hw, ICE_DBG_INIT, "%s: valid_functions (bitmap) = %d\n", prefix,
+ caps->valid_functions);
+ break;
+ case ICE_AQC_CAPS_SRIOV:
+ caps->sr_iov_1_1 = (number == 1);
+ ice_debug(hw, ICE_DBG_INIT, "%s: sr_iov_1_1 = %d\n", prefix,
+ caps->sr_iov_1_1);
+ break;
+ case ICE_AQC_CAPS_DCB:
+ caps->dcb = (number == 1);
+ caps->active_tc_bitmap = logical_id;
+ caps->maxtc = phys_id;
+ ice_debug(hw, ICE_DBG_INIT, "%s: dcb = %d\n", prefix, caps->dcb);
+ ice_debug(hw, ICE_DBG_INIT, "%s: active_tc_bitmap = %d\n", prefix,
+ caps->active_tc_bitmap);
+ ice_debug(hw, ICE_DBG_INIT, "%s: maxtc = %d\n", prefix, caps->maxtc);
+ break;
+ case ICE_AQC_CAPS_RSS:
+ caps->rss_table_size = number;
+ caps->rss_table_entry_width = logical_id;
+ ice_debug(hw, ICE_DBG_INIT, "%s: rss_table_size = %d\n", prefix,
+ caps->rss_table_size);
+ ice_debug(hw, ICE_DBG_INIT, "%s: rss_table_entry_width = %d\n", prefix,
+ caps->rss_table_entry_width);
+ break;
+ case ICE_AQC_CAPS_RXQS:
+ caps->num_rxq = number;
+ caps->rxq_first_id = phys_id;
+ ice_debug(hw, ICE_DBG_INIT, "%s: num_rxq = %d\n", prefix,
+ caps->num_rxq);
+ ice_debug(hw, ICE_DBG_INIT, "%s: rxq_first_id = %d\n", prefix,
+ caps->rxq_first_id);
+ break;
+ case ICE_AQC_CAPS_TXQS:
+ caps->num_txq = number;
+ caps->txq_first_id = phys_id;
+ ice_debug(hw, ICE_DBG_INIT, "%s: num_txq = %d\n", prefix,
+ caps->num_txq);
+ ice_debug(hw, ICE_DBG_INIT, "%s: txq_first_id = %d\n", prefix,
+ caps->txq_first_id);
+ break;
+ case ICE_AQC_CAPS_MSIX:
+ caps->num_msix_vectors = number;
+ caps->msix_vector_first_id = phys_id;
+ ice_debug(hw, ICE_DBG_INIT, "%s: num_msix_vectors = %d\n", prefix,
+ caps->num_msix_vectors);
+ ice_debug(hw, ICE_DBG_INIT, "%s: msix_vector_first_id = %d\n", prefix,
+ caps->msix_vector_first_id);
+ break;
+ case ICE_AQC_CAPS_PENDING_NVM_VER:
+ caps->nvm_update_pending_nvm = true;
+ ice_debug(hw, ICE_DBG_INIT, "%s: update_pending_nvm\n", prefix);
+ break;
+ case ICE_AQC_CAPS_PENDING_OROM_VER:
+ caps->nvm_update_pending_orom = true;
+ ice_debug(hw, ICE_DBG_INIT, "%s: update_pending_orom\n", prefix);
+ break;
+ case ICE_AQC_CAPS_PENDING_NET_VER:
+ caps->nvm_update_pending_netlist = true;
+ ice_debug(hw, ICE_DBG_INIT, "%s: update_pending_netlist\n", prefix);
+ break;
+ case ICE_AQC_CAPS_NVM_MGMT:
+ caps->nvm_unified_update =
+ (number & ICE_NVM_MGMT_UNIFIED_UPD_SUPPORT) ?
+ true : false;
+ ice_debug(hw, ICE_DBG_INIT, "%s: nvm_unified_update = %d\n", prefix,
+ caps->nvm_unified_update);
+ break;
+ case ICE_AQC_CAPS_RDMA:
+ caps->rdma = (number == 1);
+ ice_debug(hw, ICE_DBG_INIT, "%s: rdma = %d\n", prefix, caps->rdma);
+ break;
+ case ICE_AQC_CAPS_MAX_MTU:
+ caps->max_mtu = number;
+ ice_debug(hw, ICE_DBG_INIT, "%s: max_mtu = %d\n",
+ prefix, caps->max_mtu);
+ break;
+ case ICE_AQC_CAPS_PCIE_RESET_AVOIDANCE:
+ caps->pcie_reset_avoidance = (number > 0);
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: pcie_reset_avoidance = %d\n", prefix,
+ caps->pcie_reset_avoidance);
+ break;
+ case ICE_AQC_CAPS_POST_UPDATE_RESET_RESTRICT:
+ caps->reset_restrict_support = (number == 1);
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: reset_restrict_support = %d\n", prefix,
+ caps->reset_restrict_support);
+ break;
+ default:
+ /* Not one of the recognized common capabilities */
+ found = false;
+ }
+
+ return found;
+}
+
+/**
+ * ice_recalc_port_limited_caps - Recalculate port limited capabilities
+ * @hw: pointer to the HW structure
+ * @caps: pointer to capabilities structure to fix
+ *
+ * Re-calculate the capabilities that are dependent on the number of physical
+ * ports; i.e. some features are not supported or function differently on
+ * devices with more than 4 ports.
+ */
+static void
+ice_recalc_port_limited_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps)
+{
+ /* This assumes device capabilities are always scanned before function
+ * capabilities during the initialization flow.
+ */
+ if (hw->dev_caps.num_funcs > 4) {
+ /* Max 4 TCs per port */
+ caps->maxtc = 4;
+ ice_debug(hw, ICE_DBG_INIT, "reducing maxtc to %d (based on #ports)\n",
+ caps->maxtc);
+ if (caps->rdma) {
+ ice_debug(hw, ICE_DBG_INIT, "forcing RDMA off\n");
+ caps->rdma = 0;
+ }
+
+ /* print message only when processing device capabilities
+ * during initialization.
+ */
+ if (caps == &hw->dev_caps.common_cap)
+ dev_info(ice_hw_to_dev(hw), "RDMA functionality is not available with the current device configuration.\n");
+ }
+}
+
+/**
+ * ice_parse_vf_func_caps - Parse ICE_AQC_CAPS_VF function caps
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ * @cap: pointer to the capability element to parse
+ *
+ * Extract function capabilities for ICE_AQC_CAPS_VF.
+ */
+static void
+ice_parse_vf_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
+ struct ice_aqc_list_caps_elem *cap)
+{
+ u32 logical_id = le32_to_cpu(cap->logical_id);
+ u32 number = le32_to_cpu(cap->number);
+
+ func_p->num_allocd_vfs = number;
+ func_p->vf_base_id = logical_id;
+ ice_debug(hw, ICE_DBG_INIT, "func caps: num_allocd_vfs = %d\n",
+ func_p->num_allocd_vfs);
+ ice_debug(hw, ICE_DBG_INIT, "func caps: vf_base_id = %d\n",
+ func_p->vf_base_id);
+}
+
+/**
+ * ice_parse_vsi_func_caps - Parse ICE_AQC_CAPS_VSI function caps
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ * @cap: pointer to the capability element to parse
+ *
+ * Extract function capabilities for ICE_AQC_CAPS_VSI.
+ */
+static void
+ice_parse_vsi_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
+ struct ice_aqc_list_caps_elem *cap)
+{
+ func_p->guar_num_vsi = ice_get_num_per_func(hw, ICE_MAX_VSI);
+ ice_debug(hw, ICE_DBG_INIT, "func caps: guar_num_vsi (fw) = %d\n",
+ le32_to_cpu(cap->number));
+ ice_debug(hw, ICE_DBG_INIT, "func caps: guar_num_vsi = %d\n",
+ func_p->guar_num_vsi);
+}
+
+/**
+ * ice_parse_1588_func_caps - Parse ICE_AQC_CAPS_1588 function caps
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ * @cap: pointer to the capability element to parse
+ *
+ * Extract function capabilities for ICE_AQC_CAPS_1588.
+ */
+static void
+ice_parse_1588_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
+ struct ice_aqc_list_caps_elem *cap)
+{
+ struct ice_ts_func_info *info = &func_p->ts_func_info;
+ u32 number = le32_to_cpu(cap->number);
+
+ info->ena = ((number & ICE_TS_FUNC_ENA_M) != 0);
+ func_p->common_cap.ieee_1588 = info->ena;
+
+ info->src_tmr_owned = ((number & ICE_TS_SRC_TMR_OWND_M) != 0);
+ info->tmr_ena = ((number & ICE_TS_TMR_ENA_M) != 0);
+ info->tmr_index_owned = ((number & ICE_TS_TMR_IDX_OWND_M) != 0);
+ info->tmr_index_assoc = ((number & ICE_TS_TMR_IDX_ASSOC_M) != 0);
+
+ info->clk_freq = (number & ICE_TS_CLK_FREQ_M) >> ICE_TS_CLK_FREQ_S;
+ info->clk_src = ((number & ICE_TS_CLK_SRC_M) != 0);
+
+ if (info->clk_freq < NUM_ICE_TIME_REF_FREQ) {
+ info->time_ref = (enum ice_time_ref_freq)info->clk_freq;
+ } else {
+ /* Unknown clock frequency, so assume a (probably incorrect)
+ * default to avoid out-of-bounds look ups of frequency
+ * related information.
+ */
+ ice_debug(hw, ICE_DBG_INIT, "1588 func caps: unknown clock frequency %u\n",
+ info->clk_freq);
+ info->time_ref = ICE_TIME_REF_FREQ_25_000;
+ }
+
+ ice_debug(hw, ICE_DBG_INIT, "func caps: ieee_1588 = %u\n",
+ func_p->common_cap.ieee_1588);
+ ice_debug(hw, ICE_DBG_INIT, "func caps: src_tmr_owned = %u\n",
+ info->src_tmr_owned);
+ ice_debug(hw, ICE_DBG_INIT, "func caps: tmr_ena = %u\n",
+ info->tmr_ena);
+ ice_debug(hw, ICE_DBG_INIT, "func caps: tmr_index_owned = %u\n",
+ info->tmr_index_owned);
+ ice_debug(hw, ICE_DBG_INIT, "func caps: tmr_index_assoc = %u\n",
+ info->tmr_index_assoc);
+ ice_debug(hw, ICE_DBG_INIT, "func caps: clk_freq = %u\n",
+ info->clk_freq);
+ ice_debug(hw, ICE_DBG_INIT, "func caps: clk_src = %u\n",
+ info->clk_src);
+}
+
+/**
+ * ice_parse_fdir_func_caps - Parse ICE_AQC_CAPS_FD function caps
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ *
+ * Extract function capabilities for ICE_AQC_CAPS_FD.
+ */
+static void
+ice_parse_fdir_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p)
+{
+ u32 reg_val, val;
+
+ reg_val = rd32(hw, GLQF_FD_SIZE);
+ val = (reg_val & GLQF_FD_SIZE_FD_GSIZE_M) >>
+ GLQF_FD_SIZE_FD_GSIZE_S;
+ func_p->fd_fltr_guar =
+ ice_get_num_per_func(hw, val);
+ val = (reg_val & GLQF_FD_SIZE_FD_BSIZE_M) >>
+ GLQF_FD_SIZE_FD_BSIZE_S;
+ func_p->fd_fltr_best_effort = val;
+
+ ice_debug(hw, ICE_DBG_INIT, "func caps: fd_fltr_guar = %d\n",
+ func_p->fd_fltr_guar);
+ ice_debug(hw, ICE_DBG_INIT, "func caps: fd_fltr_best_effort = %d\n",
+ func_p->fd_fltr_best_effort);
+}
+
+/**
+ * ice_parse_func_caps - Parse function capabilities
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ * @buf: buffer containing the function capability records
+ * @cap_count: the number of capabilities
+ *
+ * Helper function to parse function (0x000A) capabilities list. For
+ * capabilities shared between device and function, this relies on
+ * ice_parse_common_caps.
+ *
+ * Loop through the list of provided capabilities and extract the relevant
+ * data into the function capabilities structured.
+ */
+static void
+ice_parse_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
+ void *buf, u32 cap_count)
+{
+ struct ice_aqc_list_caps_elem *cap_resp;
+ u32 i;
+
+ cap_resp = buf;
+
+ memset(func_p, 0, sizeof(*func_p));
+
+ for (i = 0; i < cap_count; i++) {
+ u16 cap = le16_to_cpu(cap_resp[i].cap);
+ bool found;
+
+ found = ice_parse_common_caps(hw, &func_p->common_cap,
+ &cap_resp[i], "func caps");
+
+ switch (cap) {
+ case ICE_AQC_CAPS_VF:
+ ice_parse_vf_func_caps(hw, func_p, &cap_resp[i]);
+ break;
+ case ICE_AQC_CAPS_VSI:
+ ice_parse_vsi_func_caps(hw, func_p, &cap_resp[i]);
+ break;
+ case ICE_AQC_CAPS_1588:
+ ice_parse_1588_func_caps(hw, func_p, &cap_resp[i]);
+ break;
+ case ICE_AQC_CAPS_FD:
+ ice_parse_fdir_func_caps(hw, func_p);
+ break;
+ default:
+ /* Don't list common capabilities as unknown */
+ if (!found)
+ ice_debug(hw, ICE_DBG_INIT, "func caps: unknown capability[%d]: 0x%x\n",
+ i, cap);
+ break;
+ }
+ }
+
+ ice_recalc_port_limited_caps(hw, &func_p->common_cap);
+}
+
+/**
+ * ice_parse_valid_functions_cap - Parse ICE_AQC_CAPS_VALID_FUNCTIONS caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_VALID_FUNCTIONS for device capabilities.
+ */
+static void
+ice_parse_valid_functions_cap(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+ struct ice_aqc_list_caps_elem *cap)
+{
+ u32 number = le32_to_cpu(cap->number);
+
+ dev_p->num_funcs = hweight32(number);
+ ice_debug(hw, ICE_DBG_INIT, "dev caps: num_funcs = %d\n",
+ dev_p->num_funcs);
+}
+
+/**
+ * ice_parse_vf_dev_caps - Parse ICE_AQC_CAPS_VF device caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_VF for device capabilities.
+ */
+static void
+ice_parse_vf_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+ struct ice_aqc_list_caps_elem *cap)
+{
+ u32 number = le32_to_cpu(cap->number);
+
+ dev_p->num_vfs_exposed = number;
+ ice_debug(hw, ICE_DBG_INIT, "dev_caps: num_vfs_exposed = %d\n",
+ dev_p->num_vfs_exposed);
+}
+
+/**
+ * ice_parse_vsi_dev_caps - Parse ICE_AQC_CAPS_VSI device caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_VSI for device capabilities.
+ */
+static void
+ice_parse_vsi_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+ struct ice_aqc_list_caps_elem *cap)
+{
+ u32 number = le32_to_cpu(cap->number);
+
+ dev_p->num_vsi_allocd_to_host = number;
+ ice_debug(hw, ICE_DBG_INIT, "dev caps: num_vsi_allocd_to_host = %d\n",
+ dev_p->num_vsi_allocd_to_host);
+}
+
+/**
+ * ice_parse_1588_dev_caps - Parse ICE_AQC_CAPS_1588 device caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_1588 for device capabilities.
+ */
+static void
+ice_parse_1588_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+ struct ice_aqc_list_caps_elem *cap)
+{
+ struct ice_ts_dev_info *info = &dev_p->ts_dev_info;
+ u32 logical_id = le32_to_cpu(cap->logical_id);
+ u32 phys_id = le32_to_cpu(cap->phys_id);
+ u32 number = le32_to_cpu(cap->number);
+
+ info->ena = ((number & ICE_TS_DEV_ENA_M) != 0);
+ dev_p->common_cap.ieee_1588 = info->ena;
+
+ info->tmr0_owner = number & ICE_TS_TMR0_OWNR_M;
+ info->tmr0_owned = ((number & ICE_TS_TMR0_OWND_M) != 0);
+ info->tmr0_ena = ((number & ICE_TS_TMR0_ENA_M) != 0);
+
+ info->tmr1_owner = (number & ICE_TS_TMR1_OWNR_M) >> ICE_TS_TMR1_OWNR_S;
+ info->tmr1_owned = ((number & ICE_TS_TMR1_OWND_M) != 0);
+ info->tmr1_ena = ((number & ICE_TS_TMR1_ENA_M) != 0);
+
+ info->ts_ll_read = ((number & ICE_TS_LL_TX_TS_READ_M) != 0);
+
+ info->ena_ports = logical_id;
+ info->tmr_own_map = phys_id;
+
+ ice_debug(hw, ICE_DBG_INIT, "dev caps: ieee_1588 = %u\n",
+ dev_p->common_cap.ieee_1588);
+ ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr0_owner = %u\n",
+ info->tmr0_owner);
+ ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr0_owned = %u\n",
+ info->tmr0_owned);
+ ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr0_ena = %u\n",
+ info->tmr0_ena);
+ ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr1_owner = %u\n",
+ info->tmr1_owner);
+ ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr1_owned = %u\n",
+ info->tmr1_owned);
+ ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr1_ena = %u\n",
+ info->tmr1_ena);
+ ice_debug(hw, ICE_DBG_INIT, "dev caps: ts_ll_read = %u\n",
+ info->ts_ll_read);
+ ice_debug(hw, ICE_DBG_INIT, "dev caps: ieee_1588 ena_ports = %u\n",
+ info->ena_ports);
+ ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr_own_map = %u\n",
+ info->tmr_own_map);
+}
+
+/**
+ * ice_parse_fdir_dev_caps - Parse ICE_AQC_CAPS_FD device caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_FD for device capabilities.
+ */
+static void
+ice_parse_fdir_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+ struct ice_aqc_list_caps_elem *cap)
+{
+ u32 number = le32_to_cpu(cap->number);
+
+ dev_p->num_flow_director_fltr = number;
+ ice_debug(hw, ICE_DBG_INIT, "dev caps: num_flow_director_fltr = %d\n",
+ dev_p->num_flow_director_fltr);
+}
+
+/**
+ * ice_parse_dev_caps - Parse device capabilities
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @buf: buffer containing the device capability records
+ * @cap_count: the number of capabilities
+ *
+ * Helper device to parse device (0x000B) capabilities list. For
+ * capabilities shared between device and function, this relies on
+ * ice_parse_common_caps.
+ *
+ * Loop through the list of provided capabilities and extract the relevant
+ * data into the device capabilities structured.
+ */
+static void
+ice_parse_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+ void *buf, u32 cap_count)
+{
+ struct ice_aqc_list_caps_elem *cap_resp;
+ u32 i;
+
+ cap_resp = buf;
+
+ memset(dev_p, 0, sizeof(*dev_p));
+
+ for (i = 0; i < cap_count; i++) {
+ u16 cap = le16_to_cpu(cap_resp[i].cap);
+ bool found;
+
+ found = ice_parse_common_caps(hw, &dev_p->common_cap,
+ &cap_resp[i], "dev caps");
+
+ switch (cap) {
+ case ICE_AQC_CAPS_VALID_FUNCTIONS:
+ ice_parse_valid_functions_cap(hw, dev_p, &cap_resp[i]);
+ break;
+ case ICE_AQC_CAPS_VF:
+ ice_parse_vf_dev_caps(hw, dev_p, &cap_resp[i]);
+ break;
+ case ICE_AQC_CAPS_VSI:
+ ice_parse_vsi_dev_caps(hw, dev_p, &cap_resp[i]);
+ break;
+ case ICE_AQC_CAPS_1588:
+ ice_parse_1588_dev_caps(hw, dev_p, &cap_resp[i]);
+ break;
+ case ICE_AQC_CAPS_FD:
+ ice_parse_fdir_dev_caps(hw, dev_p, &cap_resp[i]);
+ break;
+ default:
+ /* Don't list common capabilities as unknown */
+ if (!found)
+ ice_debug(hw, ICE_DBG_INIT, "dev caps: unknown capability[%d]: 0x%x\n",
+ i, cap);
+ break;
+ }
+ }
+
+ ice_recalc_port_limited_caps(hw, &dev_p->common_cap);
+}
+
+/**
+ * ice_aq_list_caps - query function/device capabilities
+ * @hw: pointer to the HW struct
+ * @buf: a buffer to hold the capabilities
+ * @buf_size: size of the buffer
+ * @cap_count: if not NULL, set to the number of capabilities reported
+ * @opc: capabilities type to discover, device or function
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get the function (0x000A) or device (0x000B) capabilities description from
+ * firmware and store it in the buffer.
+ *
+ * If the cap_count pointer is not NULL, then it is set to the number of
+ * capabilities firmware will report. Note that if the buffer size is too
+ * small, it is possible the command will return ICE_AQ_ERR_ENOMEM. The
+ * cap_count will still be updated in this case. It is recommended that the
+ * buffer size be set to ICE_AQ_MAX_BUF_LEN (the largest possible buffer that
+ * firmware could return) to avoid this.
+ */
+int
+ice_aq_list_caps(struct ice_hw *hw, void *buf, u16 buf_size, u32 *cap_count,
+ enum ice_adminq_opc opc, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_list_caps *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ cmd = &desc.params.get_cap;
+
+ if (opc != ice_aqc_opc_list_func_caps &&
+ opc != ice_aqc_opc_list_dev_caps)
+ return -EINVAL;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, opc);
+ status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+
+ if (cap_count)
+ *cap_count = le32_to_cpu(cmd->count);
+
+ return status;
+}
+
+/**
+ * ice_discover_dev_caps - Read and extract device capabilities
+ * @hw: pointer to the hardware structure
+ * @dev_caps: pointer to device capabilities structure
+ *
+ * Read the device capabilities and extract them into the dev_caps structure
+ * for later use.
+ */
+int
+ice_discover_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_caps)
+{
+ u32 cap_count = 0;
+ void *cbuf;
+ int status;
+
+ cbuf = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
+ if (!cbuf)
+ return -ENOMEM;
+
+ /* Although the driver doesn't know the number of capabilities the
+ * device will return, we can simply send a 4KB buffer, the maximum
+ * possible size that firmware can return.
+ */
+ cap_count = ICE_AQ_MAX_BUF_LEN / sizeof(struct ice_aqc_list_caps_elem);
+
+ status = ice_aq_list_caps(hw, cbuf, ICE_AQ_MAX_BUF_LEN, &cap_count,
+ ice_aqc_opc_list_dev_caps, NULL);
+ if (!status)
+ ice_parse_dev_caps(hw, dev_caps, cbuf, cap_count);
+ kfree(cbuf);
+
+ return status;
+}
+
+/**
+ * ice_discover_func_caps - Read and extract function capabilities
+ * @hw: pointer to the hardware structure
+ * @func_caps: pointer to function capabilities structure
+ *
+ * Read the function capabilities and extract them into the func_caps structure
+ * for later use.
+ */
+static int
+ice_discover_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_caps)
+{
+ u32 cap_count = 0;
+ void *cbuf;
+ int status;
+
+ cbuf = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
+ if (!cbuf)
+ return -ENOMEM;
+
+ /* Although the driver doesn't know the number of capabilities the
+ * device will return, we can simply send a 4KB buffer, the maximum
+ * possible size that firmware can return.
+ */
+ cap_count = ICE_AQ_MAX_BUF_LEN / sizeof(struct ice_aqc_list_caps_elem);
+
+ status = ice_aq_list_caps(hw, cbuf, ICE_AQ_MAX_BUF_LEN, &cap_count,
+ ice_aqc_opc_list_func_caps, NULL);
+ if (!status)
+ ice_parse_func_caps(hw, func_caps, cbuf, cap_count);
+ kfree(cbuf);
+
+ return status;
+}
+
+/**
+ * ice_set_safe_mode_caps - Override dev/func capabilities when in safe mode
+ * @hw: pointer to the hardware structure
+ */
+void ice_set_safe_mode_caps(struct ice_hw *hw)
+{
+ struct ice_hw_func_caps *func_caps = &hw->func_caps;
+ struct ice_hw_dev_caps *dev_caps = &hw->dev_caps;
+ struct ice_hw_common_caps cached_caps;
+ u32 num_funcs;
+
+ /* cache some func_caps values that should be restored after memset */
+ cached_caps = func_caps->common_cap;
+
+ /* unset func capabilities */
+ memset(func_caps, 0, sizeof(*func_caps));
+
+#define ICE_RESTORE_FUNC_CAP(name) \
+ func_caps->common_cap.name = cached_caps.name
+
+ /* restore cached values */
+ ICE_RESTORE_FUNC_CAP(valid_functions);
+ ICE_RESTORE_FUNC_CAP(txq_first_id);
+ ICE_RESTORE_FUNC_CAP(rxq_first_id);
+ ICE_RESTORE_FUNC_CAP(msix_vector_first_id);
+ ICE_RESTORE_FUNC_CAP(max_mtu);
+ ICE_RESTORE_FUNC_CAP(nvm_unified_update);
+ ICE_RESTORE_FUNC_CAP(nvm_update_pending_nvm);
+ ICE_RESTORE_FUNC_CAP(nvm_update_pending_orom);
+ ICE_RESTORE_FUNC_CAP(nvm_update_pending_netlist);
+
+ /* one Tx and one Rx queue in safe mode */
+ func_caps->common_cap.num_rxq = 1;
+ func_caps->common_cap.num_txq = 1;
+
+ /* two MSIX vectors, one for traffic and one for misc causes */
+ func_caps->common_cap.num_msix_vectors = 2;
+ func_caps->guar_num_vsi = 1;
+
+ /* cache some dev_caps values that should be restored after memset */
+ cached_caps = dev_caps->common_cap;
+ num_funcs = dev_caps->num_funcs;
+
+ /* unset dev capabilities */
+ memset(dev_caps, 0, sizeof(*dev_caps));
+
+#define ICE_RESTORE_DEV_CAP(name) \
+ dev_caps->common_cap.name = cached_caps.name
+
+ /* restore cached values */
+ ICE_RESTORE_DEV_CAP(valid_functions);
+ ICE_RESTORE_DEV_CAP(txq_first_id);
+ ICE_RESTORE_DEV_CAP(rxq_first_id);
+ ICE_RESTORE_DEV_CAP(msix_vector_first_id);
+ ICE_RESTORE_DEV_CAP(max_mtu);
+ ICE_RESTORE_DEV_CAP(nvm_unified_update);
+ ICE_RESTORE_DEV_CAP(nvm_update_pending_nvm);
+ ICE_RESTORE_DEV_CAP(nvm_update_pending_orom);
+ ICE_RESTORE_DEV_CAP(nvm_update_pending_netlist);
+ dev_caps->num_funcs = num_funcs;
+
+ /* one Tx and one Rx queue per function in safe mode */
+ dev_caps->common_cap.num_rxq = num_funcs;
+ dev_caps->common_cap.num_txq = num_funcs;
+
+ /* two MSIX vectors per function */
+ dev_caps->common_cap.num_msix_vectors = 2 * num_funcs;
+}
+
+/**
+ * ice_get_caps - get info about the HW
+ * @hw: pointer to the hardware structure
+ */
+int ice_get_caps(struct ice_hw *hw)
+{
+ int status;
+
+ status = ice_discover_dev_caps(hw, &hw->dev_caps);
+ if (status)
+ return status;
+
+ return ice_discover_func_caps(hw, &hw->func_caps);
+}
+
+/**
+ * ice_aq_manage_mac_write - manage MAC address write command
+ * @hw: pointer to the HW struct
+ * @mac_addr: MAC address to be written as LAA/LAA+WoL/Port address
+ * @flags: flags to control write behavior
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function is used to write MAC address to the NVM (0x0108).
+ */
+int
+ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_manage_mac_write *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.mac_write;
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_manage_mac_write);
+
+ cmd->flags = flags;
+ ether_addr_copy(cmd->mac_addr, mac_addr);
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_clear_pxe_mode
+ * @hw: pointer to the HW struct
+ *
+ * Tell the firmware that the driver is taking over from PXE (0x0110).
+ */
+static int ice_aq_clear_pxe_mode(struct ice_hw *hw)
+{
+ struct ice_aq_desc desc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_clear_pxe_mode);
+ desc.params.clear_pxe.rx_cnt = ICE_AQC_CLEAR_PXE_RX_CNT;
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_clear_pxe_mode - clear pxe operations mode
+ * @hw: pointer to the HW struct
+ *
+ * Make sure all PXE mode settings are cleared, including things
+ * like descriptor fetch/write-back mode.
+ */
+void ice_clear_pxe_mode(struct ice_hw *hw)
+{
+ if (ice_check_sq_alive(hw, &hw->adminq))
+ ice_aq_clear_pxe_mode(hw);
+}
+
+/**
+ * ice_aq_set_port_params - set physical port parameters.
+ * @pi: pointer to the port info struct
+ * @double_vlan: if set double VLAN is enabled
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set Physical port parameters (0x0203)
+ */
+int
+ice_aq_set_port_params(struct ice_port_info *pi, bool double_vlan,
+ struct ice_sq_cd *cd)
+
+{
+ struct ice_aqc_set_port_params *cmd;
+ struct ice_hw *hw = pi->hw;
+ struct ice_aq_desc desc;
+ u16 cmd_flags = 0;
+
+ cmd = &desc.params.set_port_params;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_port_params);
+ if (double_vlan)
+ cmd_flags |= ICE_AQC_SET_P_PARAMS_DOUBLE_VLAN_ENA;
+ cmd->cmd_flags = cpu_to_le16(cmd_flags);
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_is_100m_speed_supported
+ * @hw: pointer to the HW struct
+ *
+ * returns true if 100M speeds are supported by the device,
+ * false otherwise.
+ */
+bool ice_is_100m_speed_supported(struct ice_hw *hw)
+{
+ switch (hw->device_id) {
+ case ICE_DEV_ID_E822C_SGMII:
+ case ICE_DEV_ID_E822L_SGMII:
+ case ICE_DEV_ID_E823L_1GBE:
+ case ICE_DEV_ID_E823C_SGMII:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/**
+ * ice_get_link_speed_based_on_phy_type - returns link speed
+ * @phy_type_low: lower part of phy_type
+ * @phy_type_high: higher part of phy_type
+ *
+ * This helper function will convert an entry in PHY type structure
+ * [phy_type_low, phy_type_high] to its corresponding link speed.
+ * Note: In the structure of [phy_type_low, phy_type_high], there should
+ * be one bit set, as this function will convert one PHY type to its
+ * speed.
+ * If no bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned
+ * If more than one bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned
+ */
+static u16
+ice_get_link_speed_based_on_phy_type(u64 phy_type_low, u64 phy_type_high)
+{
+ u16 speed_phy_type_high = ICE_AQ_LINK_SPEED_UNKNOWN;
+ u16 speed_phy_type_low = ICE_AQ_LINK_SPEED_UNKNOWN;
+
+ switch (phy_type_low) {
+ case ICE_PHY_TYPE_LOW_100BASE_TX:
+ case ICE_PHY_TYPE_LOW_100M_SGMII:
+ speed_phy_type_low = ICE_AQ_LINK_SPEED_100MB;
+ break;
+ case ICE_PHY_TYPE_LOW_1000BASE_T:
+ case ICE_PHY_TYPE_LOW_1000BASE_SX:
+ case ICE_PHY_TYPE_LOW_1000BASE_LX:
+ case ICE_PHY_TYPE_LOW_1000BASE_KX:
+ case ICE_PHY_TYPE_LOW_1G_SGMII:
+ speed_phy_type_low = ICE_AQ_LINK_SPEED_1000MB;
+ break;
+ case ICE_PHY_TYPE_LOW_2500BASE_T:
+ case ICE_PHY_TYPE_LOW_2500BASE_X:
+ case ICE_PHY_TYPE_LOW_2500BASE_KX:
+ speed_phy_type_low = ICE_AQ_LINK_SPEED_2500MB;
+ break;
+ case ICE_PHY_TYPE_LOW_5GBASE_T:
+ case ICE_PHY_TYPE_LOW_5GBASE_KR:
+ speed_phy_type_low = ICE_AQ_LINK_SPEED_5GB;
+ break;
+ case ICE_PHY_TYPE_LOW_10GBASE_T:
+ case ICE_PHY_TYPE_LOW_10G_SFI_DA:
+ case ICE_PHY_TYPE_LOW_10GBASE_SR:
+ case ICE_PHY_TYPE_LOW_10GBASE_LR:
+ case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
+ case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
+ speed_phy_type_low = ICE_AQ_LINK_SPEED_10GB;
+ break;
+ case ICE_PHY_TYPE_LOW_25GBASE_T:
+ case ICE_PHY_TYPE_LOW_25GBASE_CR:
+ case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
+ case ICE_PHY_TYPE_LOW_25GBASE_CR1:
+ case ICE_PHY_TYPE_LOW_25GBASE_SR:
+ case ICE_PHY_TYPE_LOW_25GBASE_LR:
+ case ICE_PHY_TYPE_LOW_25GBASE_KR:
+ case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
+ case ICE_PHY_TYPE_LOW_25GBASE_KR1:
+ case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
+ speed_phy_type_low = ICE_AQ_LINK_SPEED_25GB;
+ break;
+ case ICE_PHY_TYPE_LOW_40GBASE_CR4:
+ case ICE_PHY_TYPE_LOW_40GBASE_SR4:
+ case ICE_PHY_TYPE_LOW_40GBASE_LR4:
+ case ICE_PHY_TYPE_LOW_40GBASE_KR4:
+ case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_40G_XLAUI:
+ speed_phy_type_low = ICE_AQ_LINK_SPEED_40GB;
+ break;
+ case ICE_PHY_TYPE_LOW_50GBASE_CR2:
+ case ICE_PHY_TYPE_LOW_50GBASE_SR2:
+ case ICE_PHY_TYPE_LOW_50GBASE_LR2:
+ case ICE_PHY_TYPE_LOW_50GBASE_KR2:
+ case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_50G_LAUI2:
+ case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_50G_AUI2:
+ case ICE_PHY_TYPE_LOW_50GBASE_CP:
+ case ICE_PHY_TYPE_LOW_50GBASE_SR:
+ case ICE_PHY_TYPE_LOW_50GBASE_FR:
+ case ICE_PHY_TYPE_LOW_50GBASE_LR:
+ case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
+ case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_50G_AUI1:
+ speed_phy_type_low = ICE_AQ_LINK_SPEED_50GB;
+ break;
+ case ICE_PHY_TYPE_LOW_100GBASE_CR4:
+ case ICE_PHY_TYPE_LOW_100GBASE_SR4:
+ case ICE_PHY_TYPE_LOW_100GBASE_LR4:
+ case ICE_PHY_TYPE_LOW_100GBASE_KR4:
+ case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_100G_CAUI4:
+ case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_100G_AUI4:
+ case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
+ case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
+ case ICE_PHY_TYPE_LOW_100GBASE_CP2:
+ case ICE_PHY_TYPE_LOW_100GBASE_SR2:
+ case ICE_PHY_TYPE_LOW_100GBASE_DR:
+ speed_phy_type_low = ICE_AQ_LINK_SPEED_100GB;
+ break;
+ default:
+ speed_phy_type_low = ICE_AQ_LINK_SPEED_UNKNOWN;
+ break;
+ }
+
+ switch (phy_type_high) {
+ case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
+ case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
+ case ICE_PHY_TYPE_HIGH_100G_CAUI2:
+ case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
+ case ICE_PHY_TYPE_HIGH_100G_AUI2:
+ speed_phy_type_high = ICE_AQ_LINK_SPEED_100GB;
+ break;
+ default:
+ speed_phy_type_high = ICE_AQ_LINK_SPEED_UNKNOWN;
+ break;
+ }
+
+ if (speed_phy_type_low == ICE_AQ_LINK_SPEED_UNKNOWN &&
+ speed_phy_type_high == ICE_AQ_LINK_SPEED_UNKNOWN)
+ return ICE_AQ_LINK_SPEED_UNKNOWN;
+ else if (speed_phy_type_low != ICE_AQ_LINK_SPEED_UNKNOWN &&
+ speed_phy_type_high != ICE_AQ_LINK_SPEED_UNKNOWN)
+ return ICE_AQ_LINK_SPEED_UNKNOWN;
+ else if (speed_phy_type_low != ICE_AQ_LINK_SPEED_UNKNOWN &&
+ speed_phy_type_high == ICE_AQ_LINK_SPEED_UNKNOWN)
+ return speed_phy_type_low;
+ else
+ return speed_phy_type_high;
+}
+
+/**
+ * ice_update_phy_type
+ * @phy_type_low: pointer to the lower part of phy_type
+ * @phy_type_high: pointer to the higher part of phy_type
+ * @link_speeds_bitmap: targeted link speeds bitmap
+ *
+ * Note: For the link_speeds_bitmap structure, you can check it at
+ * [ice_aqc_get_link_status->link_speed]. Caller can pass in
+ * link_speeds_bitmap include multiple speeds.
+ *
+ * Each entry in this [phy_type_low, phy_type_high] structure will
+ * present a certain link speed. This helper function will turn on bits
+ * in [phy_type_low, phy_type_high] structure based on the value of
+ * link_speeds_bitmap input parameter.
+ */
+void
+ice_update_phy_type(u64 *phy_type_low, u64 *phy_type_high,
+ u16 link_speeds_bitmap)
+{
+ u64 pt_high;
+ u64 pt_low;
+ int index;
+ u16 speed;
+
+ /* We first check with low part of phy_type */
+ for (index = 0; index <= ICE_PHY_TYPE_LOW_MAX_INDEX; index++) {
+ pt_low = BIT_ULL(index);
+ speed = ice_get_link_speed_based_on_phy_type(pt_low, 0);
+
+ if (link_speeds_bitmap & speed)
+ *phy_type_low |= BIT_ULL(index);
+ }
+
+ /* We then check with high part of phy_type */
+ for (index = 0; index <= ICE_PHY_TYPE_HIGH_MAX_INDEX; index++) {
+ pt_high = BIT_ULL(index);
+ speed = ice_get_link_speed_based_on_phy_type(0, pt_high);
+
+ if (link_speeds_bitmap & speed)
+ *phy_type_high |= BIT_ULL(index);
+ }
+}
+
+/**
+ * ice_aq_set_phy_cfg
+ * @hw: pointer to the HW struct
+ * @pi: port info structure of the interested logical port
+ * @cfg: structure with PHY configuration data to be set
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set the various PHY configuration parameters supported on the Port.
+ * One or more of the Set PHY config parameters may be ignored in an MFP
+ * mode as the PF may not have the privilege to set some of the PHY Config
+ * parameters. This status will be indicated by the command response (0x0601).
+ */
+int
+ice_aq_set_phy_cfg(struct ice_hw *hw, struct ice_port_info *pi,
+ struct ice_aqc_set_phy_cfg_data *cfg, struct ice_sq_cd *cd)
+{
+ struct ice_aq_desc desc;
+ int status;
+
+ if (!cfg)
+ return -EINVAL;
+
+ /* Ensure that only valid bits of cfg->caps can be turned on. */
+ if (cfg->caps & ~ICE_AQ_PHY_ENA_VALID_MASK) {
+ ice_debug(hw, ICE_DBG_PHY, "Invalid bit is set in ice_aqc_set_phy_cfg_data->caps : 0x%x\n",
+ cfg->caps);
+
+ cfg->caps &= ICE_AQ_PHY_ENA_VALID_MASK;
+ }
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_phy_cfg);
+ desc.params.set_phy.lport_num = pi->lport;
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ ice_debug(hw, ICE_DBG_LINK, "set phy cfg\n");
+ ice_debug(hw, ICE_DBG_LINK, " phy_type_low = 0x%llx\n",
+ (unsigned long long)le64_to_cpu(cfg->phy_type_low));
+ ice_debug(hw, ICE_DBG_LINK, " phy_type_high = 0x%llx\n",
+ (unsigned long long)le64_to_cpu(cfg->phy_type_high));
+ ice_debug(hw, ICE_DBG_LINK, " caps = 0x%x\n", cfg->caps);
+ ice_debug(hw, ICE_DBG_LINK, " low_power_ctrl_an = 0x%x\n",
+ cfg->low_power_ctrl_an);
+ ice_debug(hw, ICE_DBG_LINK, " eee_cap = 0x%x\n", cfg->eee_cap);
+ ice_debug(hw, ICE_DBG_LINK, " eeer_value = 0x%x\n", cfg->eeer_value);
+ ice_debug(hw, ICE_DBG_LINK, " link_fec_opt = 0x%x\n",
+ cfg->link_fec_opt);
+
+ status = ice_aq_send_cmd(hw, &desc, cfg, sizeof(*cfg), cd);
+ if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE)
+ status = 0;
+
+ if (!status)
+ pi->phy.curr_user_phy_cfg = *cfg;
+
+ return status;
+}
+
+/**
+ * ice_update_link_info - update status of the HW network link
+ * @pi: port info structure of the interested logical port
+ */
+int ice_update_link_info(struct ice_port_info *pi)
+{
+ struct ice_link_status *li;
+ int status;
+
+ if (!pi)
+ return -EINVAL;
+
+ li = &pi->phy.link_info;
+
+ status = ice_aq_get_link_info(pi, true, NULL, NULL);
+ if (status)
+ return status;
+
+ if (li->link_info & ICE_AQ_MEDIA_AVAILABLE) {
+ struct ice_aqc_get_phy_caps_data *pcaps;
+ struct ice_hw *hw;
+
+ hw = pi->hw;
+ pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps),
+ GFP_KERNEL);
+ if (!pcaps)
+ return -ENOMEM;
+
+ status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
+ pcaps, NULL);
+
+ devm_kfree(ice_hw_to_dev(hw), pcaps);
+ }
+
+ return status;
+}
+
+/**
+ * ice_cache_phy_user_req
+ * @pi: port information structure
+ * @cache_data: PHY logging data
+ * @cache_mode: PHY logging mode
+ *
+ * Log the user request on (FC, FEC, SPEED) for later use.
+ */
+static void
+ice_cache_phy_user_req(struct ice_port_info *pi,
+ struct ice_phy_cache_mode_data cache_data,
+ enum ice_phy_cache_mode cache_mode)
+{
+ if (!pi)
+ return;
+
+ switch (cache_mode) {
+ case ICE_FC_MODE:
+ pi->phy.curr_user_fc_req = cache_data.data.curr_user_fc_req;
+ break;
+ case ICE_SPEED_MODE:
+ pi->phy.curr_user_speed_req =
+ cache_data.data.curr_user_speed_req;
+ break;
+ case ICE_FEC_MODE:
+ pi->phy.curr_user_fec_req = cache_data.data.curr_user_fec_req;
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * ice_caps_to_fc_mode
+ * @caps: PHY capabilities
+ *
+ * Convert PHY FC capabilities to ice FC mode
+ */
+enum ice_fc_mode ice_caps_to_fc_mode(u8 caps)
+{
+ if (caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE &&
+ caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
+ return ICE_FC_FULL;
+
+ if (caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE)
+ return ICE_FC_TX_PAUSE;
+
+ if (caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
+ return ICE_FC_RX_PAUSE;
+
+ return ICE_FC_NONE;
+}
+
+/**
+ * ice_caps_to_fec_mode
+ * @caps: PHY capabilities
+ * @fec_options: Link FEC options
+ *
+ * Convert PHY FEC capabilities to ice FEC mode
+ */
+enum ice_fec_mode ice_caps_to_fec_mode(u8 caps, u8 fec_options)
+{
+ if (caps & ICE_AQC_PHY_EN_AUTO_FEC)
+ return ICE_FEC_AUTO;
+
+ if (fec_options & (ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN |
+ ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
+ ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN |
+ ICE_AQC_PHY_FEC_25G_KR_REQ))
+ return ICE_FEC_BASER;
+
+ if (fec_options & (ICE_AQC_PHY_FEC_25G_RS_528_REQ |
+ ICE_AQC_PHY_FEC_25G_RS_544_REQ |
+ ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN))
+ return ICE_FEC_RS;
+
+ return ICE_FEC_NONE;
+}
+
+/**
+ * ice_cfg_phy_fc - Configure PHY FC data based on FC mode
+ * @pi: port information structure
+ * @cfg: PHY configuration data to set FC mode
+ * @req_mode: FC mode to configure
+ */
+int
+ice_cfg_phy_fc(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
+ enum ice_fc_mode req_mode)
+{
+ struct ice_phy_cache_mode_data cache_data;
+ u8 pause_mask = 0x0;
+
+ if (!pi || !cfg)
+ return -EINVAL;
+
+ switch (req_mode) {
+ case ICE_FC_FULL:
+ pause_mask |= ICE_AQC_PHY_EN_TX_LINK_PAUSE;
+ pause_mask |= ICE_AQC_PHY_EN_RX_LINK_PAUSE;
+ break;
+ case ICE_FC_RX_PAUSE:
+ pause_mask |= ICE_AQC_PHY_EN_RX_LINK_PAUSE;
+ break;
+ case ICE_FC_TX_PAUSE:
+ pause_mask |= ICE_AQC_PHY_EN_TX_LINK_PAUSE;
+ break;
+ default:
+ break;
+ }
+
+ /* clear the old pause settings */
+ cfg->caps &= ~(ICE_AQC_PHY_EN_TX_LINK_PAUSE |
+ ICE_AQC_PHY_EN_RX_LINK_PAUSE);
+
+ /* set the new capabilities */
+ cfg->caps |= pause_mask;
+
+ /* Cache user FC request */
+ cache_data.data.curr_user_fc_req = req_mode;
+ ice_cache_phy_user_req(pi, cache_data, ICE_FC_MODE);
+
+ return 0;
+}
+
+/**
+ * ice_set_fc
+ * @pi: port information structure
+ * @aq_failures: pointer to status code, specific to ice_set_fc routine
+ * @ena_auto_link_update: enable automatic link update
+ *
+ * Set the requested flow control mode.
+ */
+int
+ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
+{
+ struct ice_aqc_set_phy_cfg_data cfg = { 0 };
+ struct ice_aqc_get_phy_caps_data *pcaps;
+ struct ice_hw *hw;
+ int status;
+
+ if (!pi || !aq_failures)
+ return -EINVAL;
+
+ *aq_failures = 0;
+ hw = pi->hw;
+
+ pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
+ if (!pcaps)
+ return -ENOMEM;
+
+ /* Get the current PHY config */
+ status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
+ pcaps, NULL);
+ if (status) {
+ *aq_failures = ICE_SET_FC_AQ_FAIL_GET;
+ goto out;
+ }
+
+ ice_copy_phy_caps_to_cfg(pi, pcaps, &cfg);
+
+ /* Configure the set PHY data */
+ status = ice_cfg_phy_fc(pi, &cfg, pi->fc.req_mode);
+ if (status)
+ goto out;
+
+ /* If the capabilities have changed, then set the new config */
+ if (cfg.caps != pcaps->caps) {
+ int retry_count, retry_max = 10;
+
+ /* Auto restart link so settings take effect */
+ if (ena_auto_link_update)
+ cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+ status = ice_aq_set_phy_cfg(hw, pi, &cfg, NULL);
+ if (status) {
+ *aq_failures = ICE_SET_FC_AQ_FAIL_SET;
+ goto out;
+ }
+
+ /* Update the link info
+ * It sometimes takes a really long time for link to
+ * come back from the atomic reset. Thus, we wait a
+ * little bit.
+ */
+ for (retry_count = 0; retry_count < retry_max; retry_count++) {
+ status = ice_update_link_info(pi);
+
+ if (!status)
+ break;
+
+ mdelay(100);
+ }
+
+ if (status)
+ *aq_failures = ICE_SET_FC_AQ_FAIL_UPDATE;
+ }
+
+out:
+ devm_kfree(ice_hw_to_dev(hw), pcaps);
+ return status;
+}
+
+/**
+ * ice_phy_caps_equals_cfg
+ * @phy_caps: PHY capabilities
+ * @phy_cfg: PHY configuration
+ *
+ * Helper function to determine if PHY capabilities matches PHY
+ * configuration
+ */
+bool
+ice_phy_caps_equals_cfg(struct ice_aqc_get_phy_caps_data *phy_caps,
+ struct ice_aqc_set_phy_cfg_data *phy_cfg)
+{
+ u8 caps_mask, cfg_mask;
+
+ if (!phy_caps || !phy_cfg)
+ return false;
+
+ /* These bits are not common between capabilities and configuration.
+ * Do not use them to determine equality.
+ */
+ caps_mask = ICE_AQC_PHY_CAPS_MASK & ~(ICE_AQC_PHY_AN_MODE |
+ ICE_AQC_GET_PHY_EN_MOD_QUAL);
+ cfg_mask = ICE_AQ_PHY_ENA_VALID_MASK & ~ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+ if (phy_caps->phy_type_low != phy_cfg->phy_type_low ||
+ phy_caps->phy_type_high != phy_cfg->phy_type_high ||
+ ((phy_caps->caps & caps_mask) != (phy_cfg->caps & cfg_mask)) ||
+ phy_caps->low_power_ctrl_an != phy_cfg->low_power_ctrl_an ||
+ phy_caps->eee_cap != phy_cfg->eee_cap ||
+ phy_caps->eeer_value != phy_cfg->eeer_value ||
+ phy_caps->link_fec_options != phy_cfg->link_fec_opt)
+ return false;
+
+ return true;
+}
+
+/**
+ * ice_copy_phy_caps_to_cfg - Copy PHY ability data to configuration data
+ * @pi: port information structure
+ * @caps: PHY ability structure to copy date from
+ * @cfg: PHY configuration structure to copy data to
+ *
+ * Helper function to copy AQC PHY get ability data to PHY set configuration
+ * data structure
+ */
+void
+ice_copy_phy_caps_to_cfg(struct ice_port_info *pi,
+ struct ice_aqc_get_phy_caps_data *caps,
+ struct ice_aqc_set_phy_cfg_data *cfg)
+{
+ if (!pi || !caps || !cfg)
+ return;
+
+ memset(cfg, 0, sizeof(*cfg));
+ cfg->phy_type_low = caps->phy_type_low;
+ cfg->phy_type_high = caps->phy_type_high;
+ cfg->caps = caps->caps;
+ cfg->low_power_ctrl_an = caps->low_power_ctrl_an;
+ cfg->eee_cap = caps->eee_cap;
+ cfg->eeer_value = caps->eeer_value;
+ cfg->link_fec_opt = caps->link_fec_options;
+ cfg->module_compliance_enforcement =
+ caps->module_compliance_enforcement;
+}
+
+/**
+ * ice_cfg_phy_fec - Configure PHY FEC data based on FEC mode
+ * @pi: port information structure
+ * @cfg: PHY configuration data to set FEC mode
+ * @fec: FEC mode to configure
+ */
+int
+ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
+ enum ice_fec_mode fec)
+{
+ struct ice_aqc_get_phy_caps_data *pcaps;
+ struct ice_hw *hw;
+ int status;
+
+ if (!pi || !cfg)
+ return -EINVAL;
+
+ hw = pi->hw;
+
+ pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+ if (!pcaps)
+ return -ENOMEM;
+
+ status = ice_aq_get_phy_caps(pi, false,
+ (ice_fw_supports_report_dflt_cfg(hw) ?
+ ICE_AQC_REPORT_DFLT_CFG :
+ ICE_AQC_REPORT_TOPO_CAP_MEDIA), pcaps, NULL);
+ if (status)
+ goto out;
+
+ cfg->caps |= pcaps->caps & ICE_AQC_PHY_EN_AUTO_FEC;
+ cfg->link_fec_opt = pcaps->link_fec_options;
+
+ switch (fec) {
+ case ICE_FEC_BASER:
+ /* Clear RS bits, and AND BASE-R ability
+ * bits and OR request bits.
+ */
+ cfg->link_fec_opt &= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN |
+ ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN;
+ cfg->link_fec_opt |= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
+ ICE_AQC_PHY_FEC_25G_KR_REQ;
+ break;
+ case ICE_FEC_RS:
+ /* Clear BASE-R bits, and AND RS ability
+ * bits and OR request bits.
+ */
+ cfg->link_fec_opt &= ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN;
+ cfg->link_fec_opt |= ICE_AQC_PHY_FEC_25G_RS_528_REQ |
+ ICE_AQC_PHY_FEC_25G_RS_544_REQ;
+ break;
+ case ICE_FEC_NONE:
+ /* Clear all FEC option bits. */
+ cfg->link_fec_opt &= ~ICE_AQC_PHY_FEC_MASK;
+ break;
+ case ICE_FEC_AUTO:
+ /* AND auto FEC bit, and all caps bits. */
+ cfg->caps &= ICE_AQC_PHY_CAPS_MASK;
+ cfg->link_fec_opt |= pcaps->link_fec_options;
+ break;
+ default:
+ status = -EINVAL;
+ break;
+ }
+
+ if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(hw) &&
+ !ice_fw_supports_report_dflt_cfg(hw)) {
+ struct ice_link_default_override_tlv tlv = { 0 };
+
+ status = ice_get_link_default_override(&tlv, pi);
+ if (status)
+ goto out;
+
+ if (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE) &&
+ (tlv.options & ICE_LINK_OVERRIDE_EN))
+ cfg->link_fec_opt = tlv.fec_options;
+ }
+
+out:
+ kfree(pcaps);
+
+ return status;
+}
+
+/**
+ * ice_get_link_status - get status of the HW network link
+ * @pi: port information structure
+ * @link_up: pointer to bool (true/false = linkup/linkdown)
+ *
+ * Variable link_up is true if link is up, false if link is down.
+ * The variable link_up is invalid if status is non zero. As a
+ * result of this call, link status reporting becomes enabled
+ */
+int ice_get_link_status(struct ice_port_info *pi, bool *link_up)
+{
+ struct ice_phy_info *phy_info;
+ int status = 0;
+
+ if (!pi || !link_up)
+ return -EINVAL;
+
+ phy_info = &pi->phy;
+
+ if (phy_info->get_link_info) {
+ status = ice_update_link_info(pi);
+
+ if (status)
+ ice_debug(pi->hw, ICE_DBG_LINK, "get link status error, status = %d\n",
+ status);
+ }
+
+ *link_up = phy_info->link_info.link_info & ICE_AQ_LINK_UP;
+
+ return status;
+}
+
+/**
+ * ice_aq_set_link_restart_an
+ * @pi: pointer to the port information structure
+ * @ena_link: if true: enable link, if false: disable link
+ * @cd: pointer to command details structure or NULL
+ *
+ * Sets up the link and restarts the Auto-Negotiation over the link.
+ */
+int
+ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_restart_an *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.restart_an;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_restart_an);
+
+ cmd->cmd_flags = ICE_AQC_RESTART_AN_LINK_RESTART;
+ cmd->lport_num = pi->lport;
+ if (ena_link)
+ cmd->cmd_flags |= ICE_AQC_RESTART_AN_LINK_ENABLE;
+ else
+ cmd->cmd_flags &= ~ICE_AQC_RESTART_AN_LINK_ENABLE;
+
+ return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_set_event_mask
+ * @hw: pointer to the HW struct
+ * @port_num: port number of the physical function
+ * @mask: event mask to be set
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set event mask (0x0613)
+ */
+int
+ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_set_event_mask *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.set_event_mask;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_event_mask);
+
+ cmd->lport_num = port_num;
+
+ cmd->event_mask = cpu_to_le16(mask);
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_set_mac_loopback
+ * @hw: pointer to the HW struct
+ * @ena_lpbk: Enable or Disable loopback
+ * @cd: pointer to command details structure or NULL
+ *
+ * Enable/disable loopback on a given port
+ */
+int
+ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_set_mac_lb *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.set_mac_lb;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_mac_lb);
+ if (ena_lpbk)
+ cmd->lb_mode = ICE_AQ_MAC_LB_EN;
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_set_port_id_led
+ * @pi: pointer to the port information
+ * @is_orig_mode: is this LED set to original mode (by the net-list)
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set LED value for the given port (0x06e9)
+ */
+int
+ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_set_port_id_led *cmd;
+ struct ice_hw *hw = pi->hw;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.set_port_id_led;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_port_id_led);
+
+ if (is_orig_mode)
+ cmd->ident_mode = ICE_AQC_PORT_IDENT_LED_ORIG;
+ else
+ cmd->ident_mode = ICE_AQC_PORT_IDENT_LED_BLINK;
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_get_port_options
+ * @hw: pointer to the HW struct
+ * @options: buffer for the resultant port options
+ * @option_count: input - size of the buffer in port options structures,
+ * output - number of returned port options
+ * @lport: logical port to call the command with (optional)
+ * @lport_valid: when false, FW uses port owned by the PF instead of lport,
+ * when PF owns more than 1 port it must be true
+ * @active_option_idx: index of active port option in returned buffer
+ * @active_option_valid: active option in returned buffer is valid
+ * @pending_option_idx: index of pending port option in returned buffer
+ * @pending_option_valid: pending option in returned buffer is valid
+ *
+ * Calls Get Port Options AQC (0x06ea) and verifies result.
+ */
+int
+ice_aq_get_port_options(struct ice_hw *hw,
+ struct ice_aqc_get_port_options_elem *options,
+ u8 *option_count, u8 lport, bool lport_valid,
+ u8 *active_option_idx, bool *active_option_valid,
+ u8 *pending_option_idx, bool *pending_option_valid)
+{
+ struct ice_aqc_get_port_options *cmd;
+ struct ice_aq_desc desc;
+ int status;
+ u8 i;
+
+ /* options buffer shall be able to hold max returned options */
+ if (*option_count < ICE_AQC_PORT_OPT_COUNT_M)
+ return -EINVAL;
+
+ cmd = &desc.params.get_port_options;
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_port_options);
+
+ if (lport_valid)
+ cmd->lport_num = lport;
+ cmd->lport_num_valid = lport_valid;
+
+ status = ice_aq_send_cmd(hw, &desc, options,
+ *option_count * sizeof(*options), NULL);
+ if (status)
+ return status;
+
+ /* verify direct FW response & set output parameters */
+ *option_count = FIELD_GET(ICE_AQC_PORT_OPT_COUNT_M,
+ cmd->port_options_count);
+ ice_debug(hw, ICE_DBG_PHY, "options: %x\n", *option_count);
+ *active_option_valid = FIELD_GET(ICE_AQC_PORT_OPT_VALID,
+ cmd->port_options);
+ if (*active_option_valid) {
+ *active_option_idx = FIELD_GET(ICE_AQC_PORT_OPT_ACTIVE_M,
+ cmd->port_options);
+ if (*active_option_idx > (*option_count - 1))
+ return -EIO;
+ ice_debug(hw, ICE_DBG_PHY, "active idx: %x\n",
+ *active_option_idx);
+ }
+
+ *pending_option_valid = FIELD_GET(ICE_AQC_PENDING_PORT_OPT_VALID,
+ cmd->pending_port_option_status);
+ if (*pending_option_valid) {
+ *pending_option_idx = FIELD_GET(ICE_AQC_PENDING_PORT_OPT_IDX_M,
+ cmd->pending_port_option_status);
+ if (*pending_option_idx > (*option_count - 1))
+ return -EIO;
+ ice_debug(hw, ICE_DBG_PHY, "pending idx: %x\n",
+ *pending_option_idx);
+ }
+
+ /* mask output options fields */
+ for (i = 0; i < *option_count; i++) {
+ options[i].pmd = FIELD_GET(ICE_AQC_PORT_OPT_PMD_COUNT_M,
+ options[i].pmd);
+ options[i].max_lane_speed = FIELD_GET(ICE_AQC_PORT_OPT_MAX_LANE_M,
+ options[i].max_lane_speed);
+ ice_debug(hw, ICE_DBG_PHY, "pmds: %x max speed: %x\n",
+ options[i].pmd, options[i].max_lane_speed);
+ }
+
+ return 0;
+}
+
+/**
+ * ice_aq_set_port_option
+ * @hw: pointer to the HW struct
+ * @lport: logical port to call the command with
+ * @lport_valid: when false, FW uses port owned by the PF instead of lport,
+ * when PF owns more than 1 port it must be true
+ * @new_option: new port option to be written
+ *
+ * Calls Set Port Options AQC (0x06eb).
+ */
+int
+ice_aq_set_port_option(struct ice_hw *hw, u8 lport, u8 lport_valid,
+ u8 new_option)
+{
+ struct ice_aqc_set_port_option *cmd;
+ struct ice_aq_desc desc;
+
+ if (new_option > ICE_AQC_PORT_OPT_COUNT_M)
+ return -EINVAL;
+
+ cmd = &desc.params.set_port_option;
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_port_option);
+
+ if (lport_valid)
+ cmd->lport_num = lport;
+
+ cmd->lport_num_valid = lport_valid;
+ cmd->selected_port_option = new_option;
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_sff_eeprom
+ * @hw: pointer to the HW struct
+ * @lport: bits [7:0] = logical port, bit [8] = logical port valid
+ * @bus_addr: I2C bus address of the eeprom (typically 0xA0, 0=topo default)
+ * @mem_addr: I2C offset. lower 8 bits for address, 8 upper bits zero padding.
+ * @page: QSFP page
+ * @set_page: set or ignore the page
+ * @data: pointer to data buffer to be read/written to the I2C device.
+ * @length: 1-16 for read, 1 for write.
+ * @write: 0 read, 1 for write.
+ * @cd: pointer to command details structure or NULL
+ *
+ * Read/Write SFF EEPROM (0x06EE)
+ */
+int
+ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr,
+ u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length,
+ bool write, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_sff_eeprom *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ if (!data || (mem_addr & 0xff00))
+ return -EINVAL;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_sff_eeprom);
+ cmd = &desc.params.read_write_sff_param;
+ desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD);
+ cmd->lport_num = (u8)(lport & 0xff);
+ cmd->lport_num_valid = (u8)((lport >> 8) & 0x01);
+ cmd->i2c_bus_addr = cpu_to_le16(((bus_addr >> 1) &
+ ICE_AQC_SFF_I2CBUS_7BIT_M) |
+ ((set_page <<
+ ICE_AQC_SFF_SET_EEPROM_PAGE_S) &
+ ICE_AQC_SFF_SET_EEPROM_PAGE_M));
+ cmd->i2c_mem_addr = cpu_to_le16(mem_addr & 0xff);
+ cmd->eeprom_page = cpu_to_le16((u16)page << ICE_AQC_SFF_EEPROM_PAGE_S);
+ if (write)
+ cmd->i2c_bus_addr |= cpu_to_le16(ICE_AQC_SFF_IS_WRITE);
+
+ status = ice_aq_send_cmd(hw, &desc, data, length, cd);
+ return status;
+}
+
+/**
+ * __ice_aq_get_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @params: RSS LUT parameters
+ * @set: set true to set the table, false to get the table
+ *
+ * Internal function to get (0x0B05) or set (0x0B03) RSS look up table
+ */
+static int
+__ice_aq_get_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *params, bool set)
+{
+ u16 flags = 0, vsi_id, lut_type, lut_size, glob_lut_idx, vsi_handle;
+ struct ice_aqc_get_set_rss_lut *cmd_resp;
+ struct ice_aq_desc desc;
+ int status;
+ u8 *lut;
+
+ if (!params)
+ return -EINVAL;
+
+ vsi_handle = params->vsi_handle;
+ lut = params->lut;
+
+ if (!ice_is_vsi_valid(hw, vsi_handle) || !lut)
+ return -EINVAL;
+
+ lut_size = params->lut_size;
+ lut_type = params->lut_type;
+ glob_lut_idx = params->global_lut_id;
+ vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+
+ cmd_resp = &desc.params.get_set_rss_lut;
+
+ if (set) {
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_rss_lut);
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+ } else {
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_rss_lut);
+ }
+
+ cmd_resp->vsi_id = cpu_to_le16(((vsi_id <<
+ ICE_AQC_GSET_RSS_LUT_VSI_ID_S) &
+ ICE_AQC_GSET_RSS_LUT_VSI_ID_M) |
+ ICE_AQC_GSET_RSS_LUT_VSI_VALID);
+
+ switch (lut_type) {
+ case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI:
+ case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF:
+ case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL:
+ flags |= ((lut_type << ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S) &
+ ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_M);
+ break;
+ default:
+ status = -EINVAL;
+ goto ice_aq_get_set_rss_lut_exit;
+ }
+
+ if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL) {
+ flags |= ((glob_lut_idx << ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S) &
+ ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_M);
+
+ if (!set)
+ goto ice_aq_get_set_rss_lut_send;
+ } else if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF) {
+ if (!set)
+ goto ice_aq_get_set_rss_lut_send;
+ } else {
+ goto ice_aq_get_set_rss_lut_send;
+ }
+
+ /* LUT size is only valid for Global and PF table types */
+ switch (lut_size) {
+ case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128:
+ break;
+ case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512:
+ flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512_FLAG <<
+ ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) &
+ ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M;
+ break;
+ case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K:
+ if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF) {
+ flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG <<
+ ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) &
+ ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M;
+ break;
+ }
+ fallthrough;
+ default:
+ status = -EINVAL;
+ goto ice_aq_get_set_rss_lut_exit;
+ }
+
+ice_aq_get_set_rss_lut_send:
+ cmd_resp->flags = cpu_to_le16(flags);
+ status = ice_aq_send_cmd(hw, &desc, lut, lut_size, NULL);
+
+ice_aq_get_set_rss_lut_exit:
+ return status;
+}
+
+/**
+ * ice_aq_get_rss_lut
+ * @hw: pointer to the hardware structure
+ * @get_params: RSS LUT parameters used to specify which RSS LUT to get
+ *
+ * get the RSS lookup table, PF or VSI type
+ */
+int
+ice_aq_get_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *get_params)
+{
+ return __ice_aq_get_set_rss_lut(hw, get_params, false);
+}
+
+/**
+ * ice_aq_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @set_params: RSS LUT parameters used to specify how to set the RSS LUT
+ *
+ * set the RSS lookup table, PF or VSI type
+ */
+int
+ice_aq_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *set_params)
+{
+ return __ice_aq_get_set_rss_lut(hw, set_params, true);
+}
+
+/**
+ * __ice_aq_get_set_rss_key
+ * @hw: pointer to the HW struct
+ * @vsi_id: VSI FW index
+ * @key: pointer to key info struct
+ * @set: set true to set the key, false to get the key
+ *
+ * get (0x0B04) or set (0x0B02) the RSS key per VSI
+ */
+static int
+__ice_aq_get_set_rss_key(struct ice_hw *hw, u16 vsi_id,
+ struct ice_aqc_get_set_rss_keys *key, bool set)
+{
+ struct ice_aqc_get_set_rss_key *cmd_resp;
+ u16 key_size = sizeof(*key);
+ struct ice_aq_desc desc;
+
+ cmd_resp = &desc.params.get_set_rss_key;
+
+ if (set) {
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_rss_key);
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+ } else {
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_rss_key);
+ }
+
+ cmd_resp->vsi_id = cpu_to_le16(((vsi_id <<
+ ICE_AQC_GSET_RSS_KEY_VSI_ID_S) &
+ ICE_AQC_GSET_RSS_KEY_VSI_ID_M) |
+ ICE_AQC_GSET_RSS_KEY_VSI_VALID);
+
+ return ice_aq_send_cmd(hw, &desc, key, key_size, NULL);
+}
+
+/**
+ * ice_aq_get_rss_key
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ * @key: pointer to key info struct
+ *
+ * get the RSS key per VSI
+ */
+int
+ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_handle,
+ struct ice_aqc_get_set_rss_keys *key)
+{
+ if (!ice_is_vsi_valid(hw, vsi_handle) || !key)
+ return -EINVAL;
+
+ return __ice_aq_get_set_rss_key(hw, ice_get_hw_vsi_num(hw, vsi_handle),
+ key, false);
+}
+
+/**
+ * ice_aq_set_rss_key
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ * @keys: pointer to key info struct
+ *
+ * set the RSS key per VSI
+ */
+int
+ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_handle,
+ struct ice_aqc_get_set_rss_keys *keys)
+{
+ if (!ice_is_vsi_valid(hw, vsi_handle) || !keys)
+ return -EINVAL;
+
+ return __ice_aq_get_set_rss_key(hw, ice_get_hw_vsi_num(hw, vsi_handle),
+ keys, true);
+}
+
+/**
+ * ice_aq_add_lan_txq
+ * @hw: pointer to the hardware structure
+ * @num_qgrps: Number of added queue groups
+ * @qg_list: list of queue groups to be added
+ * @buf_size: size of buffer for indirect command
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add Tx LAN queue (0x0C30)
+ *
+ * NOTE:
+ * Prior to calling add Tx LAN queue:
+ * Initialize the following as part of the Tx queue context:
+ * Completion queue ID if the queue uses Completion queue, Quanta profile,
+ * Cache profile and Packet shaper profile.
+ *
+ * After add Tx LAN queue AQ command is completed:
+ * Interrupts should be associated with specific queues,
+ * Association of Tx queue to Doorbell queue is not part of Add LAN Tx queue
+ * flow.
+ */
+static int
+ice_aq_add_lan_txq(struct ice_hw *hw, u8 num_qgrps,
+ struct ice_aqc_add_tx_qgrp *qg_list, u16 buf_size,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_add_tx_qgrp *list;
+ struct ice_aqc_add_txqs *cmd;
+ struct ice_aq_desc desc;
+ u16 i, sum_size = 0;
+
+ cmd = &desc.params.add_txqs;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_txqs);
+
+ if (!qg_list)
+ return -EINVAL;
+
+ if (num_qgrps > ICE_LAN_TXQ_MAX_QGRPS)
+ return -EINVAL;
+
+ for (i = 0, list = qg_list; i < num_qgrps; i++) {
+ sum_size += struct_size(list, txqs, list->num_txqs);
+ list = (struct ice_aqc_add_tx_qgrp *)(list->txqs +
+ list->num_txqs);
+ }
+
+ if (buf_size != sum_size)
+ return -EINVAL;
+
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ cmd->num_qgrps = num_qgrps;
+
+ return ice_aq_send_cmd(hw, &desc, qg_list, buf_size, cd);
+}
+
+/**
+ * ice_aq_dis_lan_txq
+ * @hw: pointer to the hardware structure
+ * @num_qgrps: number of groups in the list
+ * @qg_list: the list of groups to disable
+ * @buf_size: the total size of the qg_list buffer in bytes
+ * @rst_src: if called due to reset, specifies the reset source
+ * @vmvf_num: the relative VM or VF number that is undergoing the reset
+ * @cd: pointer to command details structure or NULL
+ *
+ * Disable LAN Tx queue (0x0C31)
+ */
+static int
+ice_aq_dis_lan_txq(struct ice_hw *hw, u8 num_qgrps,
+ struct ice_aqc_dis_txq_item *qg_list, u16 buf_size,
+ enum ice_disq_rst_src rst_src, u16 vmvf_num,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_dis_txq_item *item;
+ struct ice_aqc_dis_txqs *cmd;
+ struct ice_aq_desc desc;
+ u16 i, sz = 0;
+ int status;
+
+ cmd = &desc.params.dis_txqs;
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_dis_txqs);
+
+ /* qg_list can be NULL only in VM/VF reset flow */
+ if (!qg_list && !rst_src)
+ return -EINVAL;
+
+ if (num_qgrps > ICE_LAN_TXQ_MAX_QGRPS)
+ return -EINVAL;
+
+ cmd->num_entries = num_qgrps;
+
+ cmd->vmvf_and_timeout = cpu_to_le16((5 << ICE_AQC_Q_DIS_TIMEOUT_S) &
+ ICE_AQC_Q_DIS_TIMEOUT_M);
+
+ switch (rst_src) {
+ case ICE_VM_RESET:
+ cmd->cmd_type = ICE_AQC_Q_DIS_CMD_VM_RESET;
+ cmd->vmvf_and_timeout |=
+ cpu_to_le16(vmvf_num & ICE_AQC_Q_DIS_VMVF_NUM_M);
+ break;
+ case ICE_VF_RESET:
+ cmd->cmd_type = ICE_AQC_Q_DIS_CMD_VF_RESET;
+ /* In this case, FW expects vmvf_num to be absolute VF ID */
+ cmd->vmvf_and_timeout |=
+ cpu_to_le16((vmvf_num + hw->func_caps.vf_base_id) &
+ ICE_AQC_Q_DIS_VMVF_NUM_M);
+ break;
+ case ICE_NO_RESET:
+ default:
+ break;
+ }
+
+ /* flush pipe on time out */
+ cmd->cmd_type |= ICE_AQC_Q_DIS_CMD_FLUSH_PIPE;
+ /* If no queue group info, we are in a reset flow. Issue the AQ */
+ if (!qg_list)
+ goto do_aq;
+
+ /* set RD bit to indicate that command buffer is provided by the driver
+ * and it needs to be read by the firmware
+ */
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ for (i = 0, item = qg_list; i < num_qgrps; i++) {
+ u16 item_size = struct_size(item, q_id, item->num_qs);
+
+ /* If the num of queues is even, add 2 bytes of padding */
+ if ((item->num_qs % 2) == 0)
+ item_size += 2;
+
+ sz += item_size;
+
+ item = (struct ice_aqc_dis_txq_item *)((u8 *)item + item_size);
+ }
+
+ if (buf_size != sz)
+ return -EINVAL;
+
+do_aq:
+ status = ice_aq_send_cmd(hw, &desc, qg_list, buf_size, cd);
+ if (status) {
+ if (!qg_list)
+ ice_debug(hw, ICE_DBG_SCHED, "VM%d disable failed %d\n",
+ vmvf_num, hw->adminq.sq_last_status);
+ else
+ ice_debug(hw, ICE_DBG_SCHED, "disable queue %d failed %d\n",
+ le16_to_cpu(qg_list[0].q_id[0]),
+ hw->adminq.sq_last_status);
+ }
+ return status;
+}
+
+/**
+ * ice_aq_add_rdma_qsets
+ * @hw: pointer to the hardware structure
+ * @num_qset_grps: Number of RDMA Qset groups
+ * @qset_list: list of Qset groups to be added
+ * @buf_size: size of buffer for indirect command
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add Tx RDMA Qsets (0x0C33)
+ */
+static int
+ice_aq_add_rdma_qsets(struct ice_hw *hw, u8 num_qset_grps,
+ struct ice_aqc_add_rdma_qset_data *qset_list,
+ u16 buf_size, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_add_rdma_qset_data *list;
+ struct ice_aqc_add_rdma_qset *cmd;
+ struct ice_aq_desc desc;
+ u16 i, sum_size = 0;
+
+ cmd = &desc.params.add_rdma_qset;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_rdma_qset);
+
+ if (num_qset_grps > ICE_LAN_TXQ_MAX_QGRPS)
+ return -EINVAL;
+
+ for (i = 0, list = qset_list; i < num_qset_grps; i++) {
+ u16 num_qsets = le16_to_cpu(list->num_qsets);
+
+ sum_size += struct_size(list, rdma_qsets, num_qsets);
+ list = (struct ice_aqc_add_rdma_qset_data *)(list->rdma_qsets +
+ num_qsets);
+ }
+
+ if (buf_size != sum_size)
+ return -EINVAL;
+
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ cmd->num_qset_grps = num_qset_grps;
+
+ return ice_aq_send_cmd(hw, &desc, qset_list, buf_size, cd);
+}
+
+/* End of FW Admin Queue command wrappers */
+
+/**
+ * ice_write_byte - write a byte to a packed context structure
+ * @src_ctx: the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info: a description of the struct to be filled
+ */
+static void
+ice_write_byte(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+{
+ u8 src_byte, dest_byte, mask;
+ u8 *from, *dest;
+ u16 shift_width;
+
+ /* copy from the next struct field */
+ from = src_ctx + ce_info->offset;
+
+ /* prepare the bits and mask */
+ shift_width = ce_info->lsb % 8;
+ mask = (u8)(BIT(ce_info->width) - 1);
+
+ src_byte = *from;
+ src_byte &= mask;
+
+ /* shift to correct alignment */
+ mask <<= shift_width;
+ src_byte <<= shift_width;
+
+ /* get the current bits from the target bit string */
+ dest = dest_ctx + (ce_info->lsb / 8);
+
+ memcpy(&dest_byte, dest, sizeof(dest_byte));
+
+ dest_byte &= ~mask; /* get the bits not changing */
+ dest_byte |= src_byte; /* add in the new bits */
+
+ /* put it all back */
+ memcpy(dest, &dest_byte, sizeof(dest_byte));
+}
+
+/**
+ * ice_write_word - write a word to a packed context structure
+ * @src_ctx: the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info: a description of the struct to be filled
+ */
+static void
+ice_write_word(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+{
+ u16 src_word, mask;
+ __le16 dest_word;
+ u8 *from, *dest;
+ u16 shift_width;
+
+ /* copy from the next struct field */
+ from = src_ctx + ce_info->offset;
+
+ /* prepare the bits and mask */
+ shift_width = ce_info->lsb % 8;
+ mask = BIT(ce_info->width) - 1;
+
+ /* don't swizzle the bits until after the mask because the mask bits
+ * will be in a different bit position on big endian machines
+ */
+ src_word = *(u16 *)from;
+ src_word &= mask;
+
+ /* shift to correct alignment */
+ mask <<= shift_width;
+ src_word <<= shift_width;
+
+ /* get the current bits from the target bit string */
+ dest = dest_ctx + (ce_info->lsb / 8);
+
+ memcpy(&dest_word, dest, sizeof(dest_word));
+
+ dest_word &= ~(cpu_to_le16(mask)); /* get the bits not changing */
+ dest_word |= cpu_to_le16(src_word); /* add in the new bits */
+
+ /* put it all back */
+ memcpy(dest, &dest_word, sizeof(dest_word));
+}
+
+/**
+ * ice_write_dword - write a dword to a packed context structure
+ * @src_ctx: the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info: a description of the struct to be filled
+ */
+static void
+ice_write_dword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+{
+ u32 src_dword, mask;
+ __le32 dest_dword;
+ u8 *from, *dest;
+ u16 shift_width;
+
+ /* copy from the next struct field */
+ from = src_ctx + ce_info->offset;
+
+ /* prepare the bits and mask */
+ shift_width = ce_info->lsb % 8;
+
+ /* if the field width is exactly 32 on an x86 machine, then the shift
+ * operation will not work because the SHL instructions count is masked
+ * to 5 bits so the shift will do nothing
+ */
+ if (ce_info->width < 32)
+ mask = BIT(ce_info->width) - 1;
+ else
+ mask = (u32)~0;
+
+ /* don't swizzle the bits until after the mask because the mask bits
+ * will be in a different bit position on big endian machines
+ */
+ src_dword = *(u32 *)from;
+ src_dword &= mask;
+
+ /* shift to correct alignment */
+ mask <<= shift_width;
+ src_dword <<= shift_width;
+
+ /* get the current bits from the target bit string */
+ dest = dest_ctx + (ce_info->lsb / 8);
+
+ memcpy(&dest_dword, dest, sizeof(dest_dword));
+
+ dest_dword &= ~(cpu_to_le32(mask)); /* get the bits not changing */
+ dest_dword |= cpu_to_le32(src_dword); /* add in the new bits */
+
+ /* put it all back */
+ memcpy(dest, &dest_dword, sizeof(dest_dword));
+}
+
+/**
+ * ice_write_qword - write a qword to a packed context structure
+ * @src_ctx: the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info: a description of the struct to be filled
+ */
+static void
+ice_write_qword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+{
+ u64 src_qword, mask;
+ __le64 dest_qword;
+ u8 *from, *dest;
+ u16 shift_width;
+
+ /* copy from the next struct field */
+ from = src_ctx + ce_info->offset;
+
+ /* prepare the bits and mask */
+ shift_width = ce_info->lsb % 8;
+
+ /* if the field width is exactly 64 on an x86 machine, then the shift
+ * operation will not work because the SHL instructions count is masked
+ * to 6 bits so the shift will do nothing
+ */
+ if (ce_info->width < 64)
+ mask = BIT_ULL(ce_info->width) - 1;
+ else
+ mask = (u64)~0;
+
+ /* don't swizzle the bits until after the mask because the mask bits
+ * will be in a different bit position on big endian machines
+ */
+ src_qword = *(u64 *)from;
+ src_qword &= mask;
+
+ /* shift to correct alignment */
+ mask <<= shift_width;
+ src_qword <<= shift_width;
+
+ /* get the current bits from the target bit string */
+ dest = dest_ctx + (ce_info->lsb / 8);
+
+ memcpy(&dest_qword, dest, sizeof(dest_qword));
+
+ dest_qword &= ~(cpu_to_le64(mask)); /* get the bits not changing */
+ dest_qword |= cpu_to_le64(src_qword); /* add in the new bits */
+
+ /* put it all back */
+ memcpy(dest, &dest_qword, sizeof(dest_qword));
+}
+
+/**
+ * ice_set_ctx - set context bits in packed structure
+ * @hw: pointer to the hardware structure
+ * @src_ctx: pointer to a generic non-packed context structure
+ * @dest_ctx: pointer to memory for the packed structure
+ * @ce_info: a description of the structure to be transformed
+ */
+int
+ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx,
+ const struct ice_ctx_ele *ce_info)
+{
+ int f;
+
+ for (f = 0; ce_info[f].width; f++) {
+ /* We have to deal with each element of the FW response
+ * using the correct size so that we are correct regardless
+ * of the endianness of the machine.
+ */
+ if (ce_info[f].width > (ce_info[f].size_of * BITS_PER_BYTE)) {
+ ice_debug(hw, ICE_DBG_QCTX, "Field %d width of %d bits larger than size of %d byte(s) ... skipping write\n",
+ f, ce_info[f].width, ce_info[f].size_of);
+ continue;
+ }
+ switch (ce_info[f].size_of) {
+ case sizeof(u8):
+ ice_write_byte(src_ctx, dest_ctx, &ce_info[f]);
+ break;
+ case sizeof(u16):
+ ice_write_word(src_ctx, dest_ctx, &ce_info[f]);
+ break;
+ case sizeof(u32):
+ ice_write_dword(src_ctx, dest_ctx, &ce_info[f]);
+ break;
+ case sizeof(u64):
+ ice_write_qword(src_ctx, dest_ctx, &ce_info[f]);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_get_lan_q_ctx - get the LAN queue context for the given VSI and TC
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @q_handle: software queue handle
+ */
+struct ice_q_ctx *
+ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle)
+{
+ struct ice_vsi_ctx *vsi;
+ struct ice_q_ctx *q_ctx;
+
+ vsi = ice_get_vsi_ctx(hw, vsi_handle);
+ if (!vsi)
+ return NULL;
+ if (q_handle >= vsi->num_lan_q_entries[tc])
+ return NULL;
+ if (!vsi->lan_q_ctx[tc])
+ return NULL;
+ q_ctx = vsi->lan_q_ctx[tc];
+ return &q_ctx[q_handle];
+}
+
+/**
+ * ice_ena_vsi_txq
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @q_handle: software queue handle
+ * @num_qgrps: Number of added queue groups
+ * @buf: list of queue groups to be added
+ * @buf_size: size of buffer for indirect command
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function adds one LAN queue
+ */
+int
+ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle,
+ u8 num_qgrps, struct ice_aqc_add_tx_qgrp *buf, u16 buf_size,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_txsched_elem_data node = { 0 };
+ struct ice_sched_node *parent;
+ struct ice_q_ctx *q_ctx;
+ struct ice_hw *hw;
+ int status;
+
+ if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+ return -EIO;
+
+ if (num_qgrps > 1 || buf->num_txqs > 1)
+ return -ENOSPC;
+
+ hw = pi->hw;
+
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+
+ mutex_lock(&pi->sched_lock);
+
+ q_ctx = ice_get_lan_q_ctx(hw, vsi_handle, tc, q_handle);
+ if (!q_ctx) {
+ ice_debug(hw, ICE_DBG_SCHED, "Enaq: invalid queue handle %d\n",
+ q_handle);
+ status = -EINVAL;
+ goto ena_txq_exit;
+ }
+
+ /* find a parent node */
+ parent = ice_sched_get_free_qparent(pi, vsi_handle, tc,
+ ICE_SCHED_NODE_OWNER_LAN);
+ if (!parent) {
+ status = -EINVAL;
+ goto ena_txq_exit;
+ }
+
+ buf->parent_teid = parent->info.node_teid;
+ node.parent_teid = parent->info.node_teid;
+ /* Mark that the values in the "generic" section as valid. The default
+ * value in the "generic" section is zero. This means that :
+ * - Scheduling mode is Bytes Per Second (BPS), indicated by Bit 0.
+ * - 0 priority among siblings, indicated by Bit 1-3.
+ * - WFQ, indicated by Bit 4.
+ * - 0 Adjustment value is used in PSM credit update flow, indicated by
+ * Bit 5-6.
+ * - Bit 7 is reserved.
+ * Without setting the generic section as valid in valid_sections, the
+ * Admin queue command will fail with error code ICE_AQ_RC_EINVAL.
+ */
+ buf->txqs[0].info.valid_sections =
+ ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
+ ICE_AQC_ELEM_VALID_EIR;
+ buf->txqs[0].info.generic = 0;
+ buf->txqs[0].info.cir_bw.bw_profile_idx =
+ cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+ buf->txqs[0].info.cir_bw.bw_alloc =
+ cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+ buf->txqs[0].info.eir_bw.bw_profile_idx =
+ cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+ buf->txqs[0].info.eir_bw.bw_alloc =
+ cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+
+ /* add the LAN queue */
+ status = ice_aq_add_lan_txq(hw, num_qgrps, buf, buf_size, cd);
+ if (status) {
+ ice_debug(hw, ICE_DBG_SCHED, "enable queue %d failed %d\n",
+ le16_to_cpu(buf->txqs[0].txq_id),
+ hw->adminq.sq_last_status);
+ goto ena_txq_exit;
+ }
+
+ node.node_teid = buf->txqs[0].q_teid;
+ node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF;
+ q_ctx->q_handle = q_handle;
+ q_ctx->q_teid = le32_to_cpu(node.node_teid);
+
+ /* add a leaf node into scheduler tree queue layer */
+ status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node);
+ if (!status)
+ status = ice_sched_replay_q_bw(pi, q_ctx);
+
+ena_txq_exit:
+ mutex_unlock(&pi->sched_lock);
+ return status;
+}
+
+/**
+ * ice_dis_vsi_txq
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @num_queues: number of queues
+ * @q_handles: pointer to software queue handle array
+ * @q_ids: pointer to the q_id array
+ * @q_teids: pointer to queue node teids
+ * @rst_src: if called due to reset, specifies the reset source
+ * @vmvf_num: the relative VM or VF number that is undergoing the reset
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function removes queues and their corresponding nodes in SW DB
+ */
+int
+ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues,
+ u16 *q_handles, u16 *q_ids, u32 *q_teids,
+ enum ice_disq_rst_src rst_src, u16 vmvf_num,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_dis_txq_item *qg_list;
+ struct ice_q_ctx *q_ctx;
+ int status = -ENOENT;
+ struct ice_hw *hw;
+ u16 i, buf_size;
+
+ if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+ return -EIO;
+
+ hw = pi->hw;
+
+ if (!num_queues) {
+ /* if queue is disabled already yet the disable queue command
+ * has to be sent to complete the VF reset, then call
+ * ice_aq_dis_lan_txq without any queue information
+ */
+ if (rst_src)
+ return ice_aq_dis_lan_txq(hw, 0, NULL, 0, rst_src,
+ vmvf_num, NULL);
+ return -EIO;
+ }
+
+ buf_size = struct_size(qg_list, q_id, 1);
+ qg_list = kzalloc(buf_size, GFP_KERNEL);
+ if (!qg_list)
+ return -ENOMEM;
+
+ mutex_lock(&pi->sched_lock);
+
+ for (i = 0; i < num_queues; i++) {
+ struct ice_sched_node *node;
+
+ node = ice_sched_find_node_by_teid(pi->root, q_teids[i]);
+ if (!node)
+ continue;
+ q_ctx = ice_get_lan_q_ctx(hw, vsi_handle, tc, q_handles[i]);
+ if (!q_ctx) {
+ ice_debug(hw, ICE_DBG_SCHED, "invalid queue handle%d\n",
+ q_handles[i]);
+ continue;
+ }
+ if (q_ctx->q_handle != q_handles[i]) {
+ ice_debug(hw, ICE_DBG_SCHED, "Err:handles %d %d\n",
+ q_ctx->q_handle, q_handles[i]);
+ continue;
+ }
+ qg_list->parent_teid = node->info.parent_teid;
+ qg_list->num_qs = 1;
+ qg_list->q_id[0] = cpu_to_le16(q_ids[i]);
+ status = ice_aq_dis_lan_txq(hw, 1, qg_list, buf_size, rst_src,
+ vmvf_num, cd);
+
+ if (status)
+ break;
+ ice_free_sched_node(pi, node);
+ q_ctx->q_handle = ICE_INVAL_Q_HANDLE;
+ }
+ mutex_unlock(&pi->sched_lock);
+ kfree(qg_list);
+ return status;
+}
+
+/**
+ * ice_cfg_vsi_qs - configure the new/existing VSI queues
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap
+ * @maxqs: max queues array per TC
+ * @owner: LAN or RDMA
+ *
+ * This function adds/updates the VSI queues per TC.
+ */
+static int
+ice_cfg_vsi_qs(struct ice_port_info *pi, u16 vsi_handle, u8 tc_bitmap,
+ u16 *maxqs, u8 owner)
+{
+ int status = 0;
+ u8 i;
+
+ if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+ return -EIO;
+
+ if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+ return -EINVAL;
+
+ mutex_lock(&pi->sched_lock);
+
+ ice_for_each_traffic_class(i) {
+ /* configuration is possible only if TC node is present */
+ if (!ice_sched_get_tc_node(pi, i))
+ continue;
+
+ status = ice_sched_cfg_vsi(pi, vsi_handle, i, maxqs[i], owner,
+ ice_is_tc_ena(tc_bitmap, i));
+ if (status)
+ break;
+ }
+
+ mutex_unlock(&pi->sched_lock);
+ return status;
+}
+
+/**
+ * ice_cfg_vsi_lan - configure VSI LAN queues
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap
+ * @max_lanqs: max LAN queues array per TC
+ *
+ * This function adds/updates the VSI LAN queues per TC.
+ */
+int
+ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_handle, u8 tc_bitmap,
+ u16 *max_lanqs)
+{
+ return ice_cfg_vsi_qs(pi, vsi_handle, tc_bitmap, max_lanqs,
+ ICE_SCHED_NODE_OWNER_LAN);
+}
+
+/**
+ * ice_cfg_vsi_rdma - configure the VSI RDMA queues
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap
+ * @max_rdmaqs: max RDMA queues array per TC
+ *
+ * This function adds/updates the VSI RDMA queues per TC.
+ */
+int
+ice_cfg_vsi_rdma(struct ice_port_info *pi, u16 vsi_handle, u16 tc_bitmap,
+ u16 *max_rdmaqs)
+{
+ return ice_cfg_vsi_qs(pi, vsi_handle, tc_bitmap, max_rdmaqs,
+ ICE_SCHED_NODE_OWNER_RDMA);
+}
+
+/**
+ * ice_ena_vsi_rdma_qset
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @rdma_qset: pointer to RDMA Qset
+ * @num_qsets: number of RDMA Qsets
+ * @qset_teid: pointer to Qset node TEIDs
+ *
+ * This function adds RDMA Qset
+ */
+int
+ice_ena_vsi_rdma_qset(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ u16 *rdma_qset, u16 num_qsets, u32 *qset_teid)
+{
+ struct ice_aqc_txsched_elem_data node = { 0 };
+ struct ice_aqc_add_rdma_qset_data *buf;
+ struct ice_sched_node *parent;
+ struct ice_hw *hw;
+ u16 i, buf_size;
+ int ret;
+
+ if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+ return -EIO;
+ hw = pi->hw;
+
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+
+ buf_size = struct_size(buf, rdma_qsets, num_qsets);
+ buf = kzalloc(buf_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ mutex_lock(&pi->sched_lock);
+
+ parent = ice_sched_get_free_qparent(pi, vsi_handle, tc,
+ ICE_SCHED_NODE_OWNER_RDMA);
+ if (!parent) {
+ ret = -EINVAL;
+ goto rdma_error_exit;
+ }
+ buf->parent_teid = parent->info.node_teid;
+ node.parent_teid = parent->info.node_teid;
+
+ buf->num_qsets = cpu_to_le16(num_qsets);
+ for (i = 0; i < num_qsets; i++) {
+ buf->rdma_qsets[i].tx_qset_id = cpu_to_le16(rdma_qset[i]);
+ buf->rdma_qsets[i].info.valid_sections =
+ ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
+ ICE_AQC_ELEM_VALID_EIR;
+ buf->rdma_qsets[i].info.generic = 0;
+ buf->rdma_qsets[i].info.cir_bw.bw_profile_idx =
+ cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+ buf->rdma_qsets[i].info.cir_bw.bw_alloc =
+ cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+ buf->rdma_qsets[i].info.eir_bw.bw_profile_idx =
+ cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+ buf->rdma_qsets[i].info.eir_bw.bw_alloc =
+ cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+ }
+ ret = ice_aq_add_rdma_qsets(hw, 1, buf, buf_size, NULL);
+ if (ret) {
+ ice_debug(hw, ICE_DBG_RDMA, "add RDMA qset failed\n");
+ goto rdma_error_exit;
+ }
+ node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF;
+ for (i = 0; i < num_qsets; i++) {
+ node.node_teid = buf->rdma_qsets[i].qset_teid;
+ ret = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1,
+ &node);
+ if (ret)
+ break;
+ qset_teid[i] = le32_to_cpu(node.node_teid);
+ }
+rdma_error_exit:
+ mutex_unlock(&pi->sched_lock);
+ kfree(buf);
+ return ret;
+}
+
+/**
+ * ice_dis_vsi_rdma_qset - free RDMA resources
+ * @pi: port_info struct
+ * @count: number of RDMA Qsets to free
+ * @qset_teid: TEID of Qset node
+ * @q_id: list of queue IDs being disabled
+ */
+int
+ice_dis_vsi_rdma_qset(struct ice_port_info *pi, u16 count, u32 *qset_teid,
+ u16 *q_id)
+{
+ struct ice_aqc_dis_txq_item *qg_list;
+ struct ice_hw *hw;
+ int status = 0;
+ u16 qg_size;
+ int i;
+
+ if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+ return -EIO;
+
+ hw = pi->hw;
+
+ qg_size = struct_size(qg_list, q_id, 1);
+ qg_list = kzalloc(qg_size, GFP_KERNEL);
+ if (!qg_list)
+ return -ENOMEM;
+
+ mutex_lock(&pi->sched_lock);
+
+ for (i = 0; i < count; i++) {
+ struct ice_sched_node *node;
+
+ node = ice_sched_find_node_by_teid(pi->root, qset_teid[i]);
+ if (!node)
+ continue;
+
+ qg_list->parent_teid = node->info.parent_teid;
+ qg_list->num_qs = 1;
+ qg_list->q_id[0] =
+ cpu_to_le16(q_id[i] |
+ ICE_AQC_Q_DIS_BUF_ELEM_TYPE_RDMA_QSET);
+
+ status = ice_aq_dis_lan_txq(hw, 1, qg_list, qg_size,
+ ICE_NO_RESET, 0, NULL);
+ if (status)
+ break;
+
+ ice_free_sched_node(pi, node);
+ }
+
+ mutex_unlock(&pi->sched_lock);
+ kfree(qg_list);
+ return status;
+}
+
+/**
+ * ice_replay_pre_init - replay pre initialization
+ * @hw: pointer to the HW struct
+ *
+ * Initializes required config data for VSI, FD, ACL, and RSS before replay.
+ */
+static int ice_replay_pre_init(struct ice_hw *hw)
+{
+ struct ice_switch_info *sw = hw->switch_info;
+ u8 i;
+
+ /* Delete old entries from replay filter list head if there is any */
+ ice_rm_all_sw_replay_rule_info(hw);
+ /* In start of replay, move entries into replay_rules list, it
+ * will allow adding rules entries back to filt_rules list,
+ * which is operational list.
+ */
+ for (i = 0; i < ICE_MAX_NUM_RECIPES; i++)
+ list_replace_init(&sw->recp_list[i].filt_rules,
+ &sw->recp_list[i].filt_replay_rules);
+ ice_sched_replay_agg_vsi_preinit(hw);
+
+ return 0;
+}
+
+/**
+ * ice_replay_vsi - replay VSI configuration
+ * @hw: pointer to the HW struct
+ * @vsi_handle: driver VSI handle
+ *
+ * Restore all VSI configuration after reset. It is required to call this
+ * function with main VSI first.
+ */
+int ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle)
+{
+ int status;
+
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+
+ /* Replay pre-initialization if there is any */
+ if (vsi_handle == ICE_MAIN_VSI_HANDLE) {
+ status = ice_replay_pre_init(hw);
+ if (status)
+ return status;
+ }
+ /* Replay per VSI all RSS configurations */
+ status = ice_replay_rss_cfg(hw, vsi_handle);
+ if (status)
+ return status;
+ /* Replay per VSI all filters */
+ status = ice_replay_vsi_all_fltr(hw, vsi_handle);
+ if (!status)
+ status = ice_replay_vsi_agg(hw, vsi_handle);
+ return status;
+}
+
+/**
+ * ice_replay_post - post replay configuration cleanup
+ * @hw: pointer to the HW struct
+ *
+ * Post replay cleanup.
+ */
+void ice_replay_post(struct ice_hw *hw)
+{
+ /* Delete old entries from replay filter list head */
+ ice_rm_all_sw_replay_rule_info(hw);
+ ice_sched_replay_agg(hw);
+}
+
+/**
+ * ice_stat_update40 - read 40 bit stat from the chip and update stat values
+ * @hw: ptr to the hardware info
+ * @reg: offset of 64 bit HW register to read from
+ * @prev_stat_loaded: bool to specify if previous stats are loaded
+ * @prev_stat: ptr to previous loaded stat value
+ * @cur_stat: ptr to current stat value
+ */
+void
+ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+ u64 *prev_stat, u64 *cur_stat)
+{
+ u64 new_data = rd64(hw, reg) & (BIT_ULL(40) - 1);
+
+ /* device stats are not reset at PFR, they likely will not be zeroed
+ * when the driver starts. Thus, save the value from the first read
+ * without adding to the statistic value so that we report stats which
+ * count up from zero.
+ */
+ if (!prev_stat_loaded) {
+ *prev_stat = new_data;
+ return;
+ }
+
+ /* Calculate the difference between the new and old values, and then
+ * add it to the software stat value.
+ */
+ if (new_data >= *prev_stat)
+ *cur_stat += new_data - *prev_stat;
+ else
+ /* to manage the potential roll-over */
+ *cur_stat += (new_data + BIT_ULL(40)) - *prev_stat;
+
+ /* Update the previously stored value to prepare for next read */
+ *prev_stat = new_data;
+}
+
+/**
+ * ice_stat_update32 - read 32 bit stat from the chip and update stat values
+ * @hw: ptr to the hardware info
+ * @reg: offset of HW register to read from
+ * @prev_stat_loaded: bool to specify if previous stats are loaded
+ * @prev_stat: ptr to previous loaded stat value
+ * @cur_stat: ptr to current stat value
+ */
+void
+ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+ u64 *prev_stat, u64 *cur_stat)
+{
+ u32 new_data;
+
+ new_data = rd32(hw, reg);
+
+ /* device stats are not reset at PFR, they likely will not be zeroed
+ * when the driver starts. Thus, save the value from the first read
+ * without adding to the statistic value so that we report stats which
+ * count up from zero.
+ */
+ if (!prev_stat_loaded) {
+ *prev_stat = new_data;
+ return;
+ }
+
+ /* Calculate the difference between the new and old values, and then
+ * add it to the software stat value.
+ */
+ if (new_data >= *prev_stat)
+ *cur_stat += new_data - *prev_stat;
+ else
+ /* to manage the potential roll-over */
+ *cur_stat += (new_data + BIT_ULL(32)) - *prev_stat;
+
+ /* Update the previously stored value to prepare for next read */
+ *prev_stat = new_data;
+}
+
+/**
+ * ice_sched_query_elem - query element information from HW
+ * @hw: pointer to the HW struct
+ * @node_teid: node TEID to be queried
+ * @buf: buffer to element information
+ *
+ * This function queries HW element information
+ */
+int
+ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
+ struct ice_aqc_txsched_elem_data *buf)
+{
+ u16 buf_size, num_elem_ret = 0;
+ int status;
+
+ buf_size = sizeof(*buf);
+ memset(buf, 0, buf_size);
+ buf->node_teid = cpu_to_le32(node_teid);
+ status = ice_aq_query_sched_elems(hw, 1, buf, buf_size, &num_elem_ret,
+ NULL);
+ if (status || num_elem_ret != 1)
+ ice_debug(hw, ICE_DBG_SCHED, "query element failed\n");
+ return status;
+}
+
+/**
+ * ice_aq_read_i2c
+ * @hw: pointer to the hw struct
+ * @topo_addr: topology address for a device to communicate with
+ * @bus_addr: 7-bit I2C bus address
+ * @addr: I2C memory address (I2C offset) with up to 16 bits
+ * @params: I2C parameters: bit [7] - Repeated start,
+ * bits [6:5] data offset size,
+ * bit [4] - I2C address type,
+ * bits [3:0] - data size to read (0-16 bytes)
+ * @data: pointer to data (0 to 16 bytes) to be read from the I2C device
+ * @cd: pointer to command details structure or NULL
+ *
+ * Read I2C (0x06E2)
+ */
+int
+ice_aq_read_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr,
+ u16 bus_addr, __le16 addr, u8 params, u8 *data,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aq_desc desc = { 0 };
+ struct ice_aqc_i2c *cmd;
+ u8 data_size;
+ int status;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_read_i2c);
+ cmd = &desc.params.read_write_i2c;
+
+ if (!data)
+ return -EINVAL;
+
+ data_size = FIELD_GET(ICE_AQC_I2C_DATA_SIZE_M, params);
+
+ cmd->i2c_bus_addr = cpu_to_le16(bus_addr);
+ cmd->topo_addr = topo_addr;
+ cmd->i2c_params = params;
+ cmd->i2c_addr = addr;
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+ if (!status) {
+ struct ice_aqc_read_i2c_resp *resp;
+ u8 i;
+
+ resp = &desc.params.read_i2c_resp;
+ for (i = 0; i < data_size; i++) {
+ *data = resp->i2c_data[i];
+ data++;
+ }
+ }
+
+ return status;
+}
+
+/**
+ * ice_aq_write_i2c
+ * @hw: pointer to the hw struct
+ * @topo_addr: topology address for a device to communicate with
+ * @bus_addr: 7-bit I2C bus address
+ * @addr: I2C memory address (I2C offset) with up to 16 bits
+ * @params: I2C parameters: bit [4] - I2C address type, bits [3:0] - data size to write (0-7 bytes)
+ * @data: pointer to data (0 to 4 bytes) to be written to the I2C device
+ * @cd: pointer to command details structure or NULL
+ *
+ * Write I2C (0x06E3)
+ *
+ * * Return:
+ * * 0 - Successful write to the i2c device
+ * * -EINVAL - Data size greater than 4 bytes
+ * * -EIO - FW error
+ */
+int
+ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr,
+ u16 bus_addr, __le16 addr, u8 params, u8 *data,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aq_desc desc = { 0 };
+ struct ice_aqc_i2c *cmd;
+ u8 data_size;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_write_i2c);
+ cmd = &desc.params.read_write_i2c;
+
+ data_size = FIELD_GET(ICE_AQC_I2C_DATA_SIZE_M, params);
+
+ /* data_size limited to 4 */
+ if (data_size > 4)
+ return -EINVAL;
+
+ cmd->i2c_bus_addr = cpu_to_le16(bus_addr);
+ cmd->topo_addr = topo_addr;
+ cmd->i2c_params = params;
+ cmd->i2c_addr = addr;
+
+ memcpy(cmd->i2c_data, data, data_size);
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_set_driver_param - Set driver parameter to share via firmware
+ * @hw: pointer to the HW struct
+ * @idx: parameter index to set
+ * @value: the value to set the parameter to
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set the value of one of the software defined parameters. All PFs connected
+ * to this device can read the value using ice_aq_get_driver_param.
+ *
+ * Note that firmware provides no synchronization or locking, and will not
+ * save the parameter value during a device reset. It is expected that
+ * a single PF will write the parameter value, while all other PFs will only
+ * read it.
+ */
+int
+ice_aq_set_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx,
+ u32 value, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_driver_shared_params *cmd;
+ struct ice_aq_desc desc;
+
+ if (idx >= ICE_AQC_DRIVER_PARAM_MAX)
+ return -EIO;
+
+ cmd = &desc.params.drv_shared_params;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_driver_shared_params);
+
+ cmd->set_or_get_op = ICE_AQC_DRIVER_PARAM_SET;
+ cmd->param_indx = idx;
+ cmd->param_val = cpu_to_le32(value);
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_get_driver_param - Get driver parameter shared via firmware
+ * @hw: pointer to the HW struct
+ * @idx: parameter index to set
+ * @value: storage to return the shared parameter
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get the value of one of the software defined parameters.
+ *
+ * Note that firmware provides no synchronization or locking. It is expected
+ * that only a single PF will write a given parameter.
+ */
+int
+ice_aq_get_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx,
+ u32 *value, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_driver_shared_params *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ if (idx >= ICE_AQC_DRIVER_PARAM_MAX)
+ return -EIO;
+
+ cmd = &desc.params.drv_shared_params;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_driver_shared_params);
+
+ cmd->set_or_get_op = ICE_AQC_DRIVER_PARAM_GET;
+ cmd->param_indx = idx;
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+ if (status)
+ return status;
+
+ *value = le32_to_cpu(cmd->param_val);
+
+ return 0;
+}
+
+/**
+ * ice_aq_set_gpio
+ * @hw: pointer to the hw struct
+ * @gpio_ctrl_handle: GPIO controller node handle
+ * @pin_idx: IO Number of the GPIO that needs to be set
+ * @value: SW provide IO value to set in the LSB
+ * @cd: pointer to command details structure or NULL
+ *
+ * Sends 0x06EC AQ command to set the GPIO pin state that's part of the topology
+ */
+int
+ice_aq_set_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, bool value,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_gpio *cmd;
+ struct ice_aq_desc desc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_gpio);
+ cmd = &desc.params.read_write_gpio;
+ cmd->gpio_ctrl_handle = cpu_to_le16(gpio_ctrl_handle);
+ cmd->gpio_num = pin_idx;
+ cmd->gpio_val = value ? 1 : 0;
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_get_gpio
+ * @hw: pointer to the hw struct
+ * @gpio_ctrl_handle: GPIO controller node handle
+ * @pin_idx: IO Number of the GPIO that needs to be set
+ * @value: IO value read
+ * @cd: pointer to command details structure or NULL
+ *
+ * Sends 0x06ED AQ command to get the value of a GPIO signal which is part of
+ * the topology
+ */
+int
+ice_aq_get_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx,
+ bool *value, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_gpio *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_gpio);
+ cmd = &desc.params.read_write_gpio;
+ cmd->gpio_ctrl_handle = cpu_to_le16(gpio_ctrl_handle);
+ cmd->gpio_num = pin_idx;
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+ if (status)
+ return status;
+
+ *value = !!cmd->gpio_val;
+ return 0;
+}
+
+/**
+ * ice_is_fw_api_min_ver
+ * @hw: pointer to the hardware structure
+ * @maj: major version
+ * @min: minor version
+ * @patch: patch version
+ *
+ * Checks if the firmware API is minimum version
+ */
+static bool ice_is_fw_api_min_ver(struct ice_hw *hw, u8 maj, u8 min, u8 patch)
+{
+ if (hw->api_maj_ver == maj) {
+ if (hw->api_min_ver > min)
+ return true;
+ if (hw->api_min_ver == min && hw->api_patch >= patch)
+ return true;
+ } else if (hw->api_maj_ver > maj) {
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * ice_fw_supports_link_override
+ * @hw: pointer to the hardware structure
+ *
+ * Checks if the firmware supports link override
+ */
+bool ice_fw_supports_link_override(struct ice_hw *hw)
+{
+ return ice_is_fw_api_min_ver(hw, ICE_FW_API_LINK_OVERRIDE_MAJ,
+ ICE_FW_API_LINK_OVERRIDE_MIN,
+ ICE_FW_API_LINK_OVERRIDE_PATCH);
+}
+
+/**
+ * ice_get_link_default_override
+ * @ldo: pointer to the link default override struct
+ * @pi: pointer to the port info struct
+ *
+ * Gets the link default override for a port
+ */
+int
+ice_get_link_default_override(struct ice_link_default_override_tlv *ldo,
+ struct ice_port_info *pi)
+{
+ u16 i, tlv, tlv_len, tlv_start, buf, offset;
+ struct ice_hw *hw = pi->hw;
+ int status;
+
+ status = ice_get_pfa_module_tlv(hw, &tlv, &tlv_len,
+ ICE_SR_LINK_DEFAULT_OVERRIDE_PTR);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read link override TLV.\n");
+ return status;
+ }
+
+ /* Each port has its own config; calculate for our port */
+ tlv_start = tlv + pi->lport * ICE_SR_PFA_LINK_OVERRIDE_WORDS +
+ ICE_SR_PFA_LINK_OVERRIDE_OFFSET;
+
+ /* link options first */
+ status = ice_read_sr_word(hw, tlv_start, &buf);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read override link options.\n");
+ return status;
+ }
+ ldo->options = buf & ICE_LINK_OVERRIDE_OPT_M;
+ ldo->phy_config = (buf & ICE_LINK_OVERRIDE_PHY_CFG_M) >>
+ ICE_LINK_OVERRIDE_PHY_CFG_S;
+
+ /* link PHY config */
+ offset = tlv_start + ICE_SR_PFA_LINK_OVERRIDE_FEC_OFFSET;
+ status = ice_read_sr_word(hw, offset, &buf);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read override phy config.\n");
+ return status;
+ }
+ ldo->fec_options = buf & ICE_LINK_OVERRIDE_FEC_OPT_M;
+
+ /* PHY types low */
+ offset = tlv_start + ICE_SR_PFA_LINK_OVERRIDE_PHY_OFFSET;
+ for (i = 0; i < ICE_SR_PFA_LINK_OVERRIDE_PHY_WORDS; i++) {
+ status = ice_read_sr_word(hw, (offset + i), &buf);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read override link options.\n");
+ return status;
+ }
+ /* shift 16 bits at a time to fill 64 bits */
+ ldo->phy_type_low |= ((u64)buf << (i * 16));
+ }
+
+ /* PHY types high */
+ offset = tlv_start + ICE_SR_PFA_LINK_OVERRIDE_PHY_OFFSET +
+ ICE_SR_PFA_LINK_OVERRIDE_PHY_WORDS;
+ for (i = 0; i < ICE_SR_PFA_LINK_OVERRIDE_PHY_WORDS; i++) {
+ status = ice_read_sr_word(hw, (offset + i), &buf);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read override link options.\n");
+ return status;
+ }
+ /* shift 16 bits at a time to fill 64 bits */
+ ldo->phy_type_high |= ((u64)buf << (i * 16));
+ }
+
+ return status;
+}
+
+/**
+ * ice_is_phy_caps_an_enabled - check if PHY capabilities autoneg is enabled
+ * @caps: get PHY capability data
+ */
+bool ice_is_phy_caps_an_enabled(struct ice_aqc_get_phy_caps_data *caps)
+{
+ if (caps->caps & ICE_AQC_PHY_AN_MODE ||
+ caps->low_power_ctrl_an & (ICE_AQC_PHY_AN_EN_CLAUSE28 |
+ ICE_AQC_PHY_AN_EN_CLAUSE73 |
+ ICE_AQC_PHY_AN_EN_CLAUSE37))
+ return true;
+
+ return false;
+}
+
+/**
+ * ice_aq_set_lldp_mib - Set the LLDP MIB
+ * @hw: pointer to the HW struct
+ * @mib_type: Local, Remote or both Local and Remote MIBs
+ * @buf: pointer to the caller-supplied buffer to store the MIB block
+ * @buf_size: size of the buffer (in bytes)
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set the LLDP MIB. (0x0A08)
+ */
+int
+ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_lldp_set_local_mib *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.lldp_set_mib;
+
+ if (buf_size == 0 || !buf)
+ return -EINVAL;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_set_local_mib);
+
+ desc.flags |= cpu_to_le16((u16)ICE_AQ_FLAG_RD);
+ desc.datalen = cpu_to_le16(buf_size);
+
+ cmd->type = mib_type;
+ cmd->length = cpu_to_le16(buf_size);
+
+ return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+}
+
+/**
+ * ice_fw_supports_lldp_fltr_ctrl - check NVM version supports lldp_fltr_ctrl
+ * @hw: pointer to HW struct
+ */
+bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw)
+{
+ if (hw->mac_type != ICE_MAC_E810)
+ return false;
+
+ return ice_is_fw_api_min_ver(hw, ICE_FW_API_LLDP_FLTR_MAJ,
+ ICE_FW_API_LLDP_FLTR_MIN,
+ ICE_FW_API_LLDP_FLTR_PATCH);
+}
+
+/**
+ * ice_lldp_fltr_add_remove - add or remove a LLDP Rx switch filter
+ * @hw: pointer to HW struct
+ * @vsi_num: absolute HW index for VSI
+ * @add: boolean for if adding or removing a filter
+ */
+int
+ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add)
+{
+ struct ice_aqc_lldp_filter_ctrl *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.lldp_filter_ctrl;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_filter_ctrl);
+
+ if (add)
+ cmd->cmd_flags = ICE_AQC_LLDP_FILTER_ACTION_ADD;
+ else
+ cmd->cmd_flags = ICE_AQC_LLDP_FILTER_ACTION_DELETE;
+
+ cmd->vsi_num = cpu_to_le16(vsi_num);
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_fw_supports_report_dflt_cfg
+ * @hw: pointer to the hardware structure
+ *
+ * Checks if the firmware supports report default configuration
+ */
+bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw)
+{
+ return ice_is_fw_api_min_ver(hw, ICE_FW_API_REPORT_DFLT_CFG_MAJ,
+ ICE_FW_API_REPORT_DFLT_CFG_MIN,
+ ICE_FW_API_REPORT_DFLT_CFG_PATCH);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
new file mode 100644
index 000000000..8b6712b92
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -0,0 +1,232 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_COMMON_H_
+#define _ICE_COMMON_H_
+
+#include <linux/bitfield.h>
+
+#include "ice_type.h"
+#include "ice_nvm.h"
+#include "ice_flex_pipe.h"
+#include <linux/avf/virtchnl.h>
+#include "ice_switch.h"
+#include "ice_fdir.h"
+
+#define ICE_SQ_SEND_DELAY_TIME_MS 10
+#define ICE_SQ_SEND_MAX_EXECUTE 3
+
+int ice_init_hw(struct ice_hw *hw);
+void ice_deinit_hw(struct ice_hw *hw);
+int ice_check_reset(struct ice_hw *hw);
+int ice_reset(struct ice_hw *hw, enum ice_reset_req req);
+int ice_create_all_ctrlq(struct ice_hw *hw);
+int ice_init_all_ctrlq(struct ice_hw *hw);
+void ice_shutdown_all_ctrlq(struct ice_hw *hw);
+void ice_destroy_all_ctrlq(struct ice_hw *hw);
+int
+ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+ struct ice_rq_event_info *e, u16 *pending);
+int
+ice_get_link_status(struct ice_port_info *pi, bool *link_up);
+int ice_update_link_info(struct ice_port_info *pi);
+int
+ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
+ enum ice_aq_res_access_type access, u32 timeout);
+void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res);
+int
+ice_alloc_hw_res(struct ice_hw *hw, u16 type, u16 num, bool btm, u16 *res);
+int
+ice_free_hw_res(struct ice_hw *hw, u16 type, u16 num, u16 *res);
+int
+ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries,
+ struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size,
+ enum ice_adminq_opc opc, struct ice_sq_cd *cd);
+bool ice_is_sbq_supported(struct ice_hw *hw);
+struct ice_ctl_q_info *ice_get_sbq(struct ice_hw *hw);
+int
+ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+ struct ice_aq_desc *desc, void *buf, u16 buf_size,
+ struct ice_sq_cd *cd);
+void ice_clear_pxe_mode(struct ice_hw *hw);
+int ice_get_caps(struct ice_hw *hw);
+
+void ice_set_safe_mode_caps(struct ice_hw *hw);
+
+int
+ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
+ u32 rxq_index);
+
+int
+ice_aq_get_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *get_params);
+int
+ice_aq_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *set_params);
+int
+ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_handle,
+ struct ice_aqc_get_set_rss_keys *keys);
+int
+ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_handle,
+ struct ice_aqc_get_set_rss_keys *keys);
+
+bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq);
+int ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
+void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
+extern const struct ice_ctx_ele ice_tlan_ctx_info[];
+int
+ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx,
+ const struct ice_ctx_ele *ce_info);
+
+extern struct mutex ice_global_cfg_lock_sw;
+
+int
+ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc,
+ void *buf, u16 buf_size, struct ice_sq_cd *cd);
+int ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd);
+
+int
+ice_aq_send_driver_ver(struct ice_hw *hw, struct ice_driver_ver *dv,
+ struct ice_sq_cd *cd);
+int
+ice_aq_set_port_params(struct ice_port_info *pi, bool double_vlan,
+ struct ice_sq_cd *cd);
+int
+ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
+ struct ice_aqc_get_phy_caps_data *caps,
+ struct ice_sq_cd *cd);
+int
+ice_aq_list_caps(struct ice_hw *hw, void *buf, u16 buf_size, u32 *cap_count,
+ enum ice_adminq_opc opc, struct ice_sq_cd *cd);
+int
+ice_discover_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_caps);
+void
+ice_update_phy_type(u64 *phy_type_low, u64 *phy_type_high,
+ u16 link_speeds_bitmap);
+int
+ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags,
+ struct ice_sq_cd *cd);
+bool ice_is_e810(struct ice_hw *hw);
+int ice_clear_pf_cfg(struct ice_hw *hw);
+int
+ice_aq_set_phy_cfg(struct ice_hw *hw, struct ice_port_info *pi,
+ struct ice_aqc_set_phy_cfg_data *cfg, struct ice_sq_cd *cd);
+bool ice_fw_supports_link_override(struct ice_hw *hw);
+int
+ice_get_link_default_override(struct ice_link_default_override_tlv *ldo,
+ struct ice_port_info *pi);
+bool ice_is_phy_caps_an_enabled(struct ice_aqc_get_phy_caps_data *caps);
+
+enum ice_fc_mode ice_caps_to_fc_mode(u8 caps);
+enum ice_fec_mode ice_caps_to_fec_mode(u8 caps, u8 fec_options);
+int
+ice_set_fc(struct ice_port_info *pi, u8 *aq_failures,
+ bool ena_auto_link_update);
+int
+ice_cfg_phy_fc(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
+ enum ice_fc_mode fc);
+bool
+ice_phy_caps_equals_cfg(struct ice_aqc_get_phy_caps_data *caps,
+ struct ice_aqc_set_phy_cfg_data *cfg);
+void
+ice_copy_phy_caps_to_cfg(struct ice_port_info *pi,
+ struct ice_aqc_get_phy_caps_data *caps,
+ struct ice_aqc_set_phy_cfg_data *cfg);
+int
+ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
+ enum ice_fec_mode fec);
+int
+ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
+ struct ice_sq_cd *cd);
+int
+ice_aq_set_mac_cfg(struct ice_hw *hw, u16 max_frame_size, struct ice_sq_cd *cd);
+int
+ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
+ struct ice_link_status *link, struct ice_sq_cd *cd);
+int
+ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
+ struct ice_sq_cd *cd);
+int
+ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd);
+
+int
+ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
+ struct ice_sq_cd *cd);
+int
+ice_aq_get_port_options(struct ice_hw *hw,
+ struct ice_aqc_get_port_options_elem *options,
+ u8 *option_count, u8 lport, bool lport_valid,
+ u8 *active_option_idx, bool *active_option_valid,
+ u8 *pending_option_idx, bool *pending_option_valid);
+int
+ice_aq_set_port_option(struct ice_hw *hw, u8 lport, u8 lport_valid,
+ u8 new_option);
+int
+ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr,
+ u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length,
+ bool write, struct ice_sq_cd *cd);
+
+int
+ice_cfg_vsi_rdma(struct ice_port_info *pi, u16 vsi_handle, u16 tc_bitmap,
+ u16 *max_rdmaqs);
+int
+ice_ena_vsi_rdma_qset(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ u16 *rdma_qset, u16 num_qsets, u32 *qset_teid);
+int
+ice_dis_vsi_rdma_qset(struct ice_port_info *pi, u16 count, u32 *qset_teid,
+ u16 *q_id);
+int
+ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues,
+ u16 *q_handle, u16 *q_ids, u32 *q_teids,
+ enum ice_disq_rst_src rst_src, u16 vmvf_num,
+ struct ice_sq_cd *cd);
+int
+ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_handle, u8 tc_bitmap,
+ u16 *max_lanqs);
+int
+ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle,
+ u8 num_qgrps, struct ice_aqc_add_tx_qgrp *buf, u16 buf_size,
+ struct ice_sq_cd *cd);
+int ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle);
+void ice_replay_post(struct ice_hw *hw);
+void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf);
+struct ice_q_ctx *
+ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle);
+int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in);
+void
+ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+ u64 *prev_stat, u64 *cur_stat);
+void
+ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+ u64 *prev_stat, u64 *cur_stat);
+bool ice_is_e810t(struct ice_hw *hw);
+int
+ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
+ struct ice_aqc_txsched_elem_data *buf);
+int
+ice_aq_set_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx,
+ u32 value, struct ice_sq_cd *cd);
+int
+ice_aq_get_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx,
+ u32 *value, struct ice_sq_cd *cd);
+int
+ice_aq_set_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, bool value,
+ struct ice_sq_cd *cd);
+int
+ice_aq_get_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx,
+ bool *value, struct ice_sq_cd *cd);
+bool ice_is_100m_speed_supported(struct ice_hw *hw);
+int
+ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
+ struct ice_sq_cd *cd);
+bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw);
+int
+ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add);
+int
+ice_aq_read_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr,
+ u16 bus_addr, __le16 addr, u8 params, u8 *data,
+ struct ice_sq_cd *cd);
+int
+ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr,
+ u16 bus_addr, __le16 addr, u8 params, u8 *data,
+ struct ice_sq_cd *cd);
+bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw);
+#endif /* _ICE_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
new file mode 100644
index 000000000..6bcfee295
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -0,0 +1,1242 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_common.h"
+
+#define ICE_CQ_INIT_REGS(qinfo, prefix) \
+do { \
+ (qinfo)->sq.head = prefix##_ATQH; \
+ (qinfo)->sq.tail = prefix##_ATQT; \
+ (qinfo)->sq.len = prefix##_ATQLEN; \
+ (qinfo)->sq.bah = prefix##_ATQBAH; \
+ (qinfo)->sq.bal = prefix##_ATQBAL; \
+ (qinfo)->sq.len_mask = prefix##_ATQLEN_ATQLEN_M; \
+ (qinfo)->sq.len_ena_mask = prefix##_ATQLEN_ATQENABLE_M; \
+ (qinfo)->sq.len_crit_mask = prefix##_ATQLEN_ATQCRIT_M; \
+ (qinfo)->sq.head_mask = prefix##_ATQH_ATQH_M; \
+ (qinfo)->rq.head = prefix##_ARQH; \
+ (qinfo)->rq.tail = prefix##_ARQT; \
+ (qinfo)->rq.len = prefix##_ARQLEN; \
+ (qinfo)->rq.bah = prefix##_ARQBAH; \
+ (qinfo)->rq.bal = prefix##_ARQBAL; \
+ (qinfo)->rq.len_mask = prefix##_ARQLEN_ARQLEN_M; \
+ (qinfo)->rq.len_ena_mask = prefix##_ARQLEN_ARQENABLE_M; \
+ (qinfo)->rq.len_crit_mask = prefix##_ARQLEN_ARQCRIT_M; \
+ (qinfo)->rq.head_mask = prefix##_ARQH_ARQH_M; \
+} while (0)
+
+/**
+ * ice_adminq_init_regs - Initialize AdminQ registers
+ * @hw: pointer to the hardware structure
+ *
+ * This assumes the alloc_sq and alloc_rq functions have already been called
+ */
+static void ice_adminq_init_regs(struct ice_hw *hw)
+{
+ struct ice_ctl_q_info *cq = &hw->adminq;
+
+ ICE_CQ_INIT_REGS(cq, PF_FW);
+}
+
+/**
+ * ice_mailbox_init_regs - Initialize Mailbox registers
+ * @hw: pointer to the hardware structure
+ *
+ * This assumes the alloc_sq and alloc_rq functions have already been called
+ */
+static void ice_mailbox_init_regs(struct ice_hw *hw)
+{
+ struct ice_ctl_q_info *cq = &hw->mailboxq;
+
+ ICE_CQ_INIT_REGS(cq, PF_MBX);
+}
+
+/**
+ * ice_sb_init_regs - Initialize Sideband registers
+ * @hw: pointer to the hardware structure
+ *
+ * This assumes the alloc_sq and alloc_rq functions have already been called
+ */
+static void ice_sb_init_regs(struct ice_hw *hw)
+{
+ struct ice_ctl_q_info *cq = &hw->sbq;
+
+ ICE_CQ_INIT_REGS(cq, PF_SB);
+}
+
+/**
+ * ice_check_sq_alive
+ * @hw: pointer to the HW struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Returns true if Queue is enabled else false.
+ */
+bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+ /* check both queue-length and queue-enable fields */
+ if (cq->sq.len && cq->sq.len_mask && cq->sq.len_ena_mask)
+ return (rd32(hw, cq->sq.len) & (cq->sq.len_mask |
+ cq->sq.len_ena_mask)) ==
+ (cq->num_sq_entries | cq->sq.len_ena_mask);
+
+ return false;
+}
+
+/**
+ * ice_alloc_ctrlq_sq_ring - Allocate Control Transmit Queue (ATQ) rings
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static int
+ice_alloc_ctrlq_sq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+ size_t size = cq->num_sq_entries * sizeof(struct ice_aq_desc);
+
+ cq->sq.desc_buf.va = dmam_alloc_coherent(ice_hw_to_dev(hw), size,
+ &cq->sq.desc_buf.pa,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!cq->sq.desc_buf.va)
+ return -ENOMEM;
+ cq->sq.desc_buf.size = size;
+
+ cq->sq.cmd_buf = devm_kcalloc(ice_hw_to_dev(hw), cq->num_sq_entries,
+ sizeof(struct ice_sq_cd), GFP_KERNEL);
+ if (!cq->sq.cmd_buf) {
+ dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.desc_buf.size,
+ cq->sq.desc_buf.va, cq->sq.desc_buf.pa);
+ cq->sq.desc_buf.va = NULL;
+ cq->sq.desc_buf.pa = 0;
+ cq->sq.desc_buf.size = 0;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_alloc_ctrlq_rq_ring - Allocate Control Receive Queue (ARQ) rings
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static int
+ice_alloc_ctrlq_rq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+ size_t size = cq->num_rq_entries * sizeof(struct ice_aq_desc);
+
+ cq->rq.desc_buf.va = dmam_alloc_coherent(ice_hw_to_dev(hw), size,
+ &cq->rq.desc_buf.pa,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!cq->rq.desc_buf.va)
+ return -ENOMEM;
+ cq->rq.desc_buf.size = size;
+ return 0;
+}
+
+/**
+ * ice_free_cq_ring - Free control queue ring
+ * @hw: pointer to the hardware structure
+ * @ring: pointer to the specific control queue ring
+ *
+ * This assumes the posted buffers have already been cleaned
+ * and de-allocated
+ */
+static void ice_free_cq_ring(struct ice_hw *hw, struct ice_ctl_q_ring *ring)
+{
+ dmam_free_coherent(ice_hw_to_dev(hw), ring->desc_buf.size,
+ ring->desc_buf.va, ring->desc_buf.pa);
+ ring->desc_buf.va = NULL;
+ ring->desc_buf.pa = 0;
+ ring->desc_buf.size = 0;
+}
+
+/**
+ * ice_alloc_rq_bufs - Allocate pre-posted buffers for the ARQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static int
+ice_alloc_rq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+ int i;
+
+ /* We'll be allocating the buffer info memory first, then we can
+ * allocate the mapped buffers for the event processing
+ */
+ cq->rq.dma_head = devm_kcalloc(ice_hw_to_dev(hw), cq->num_rq_entries,
+ sizeof(cq->rq.desc_buf), GFP_KERNEL);
+ if (!cq->rq.dma_head)
+ return -ENOMEM;
+ cq->rq.r.rq_bi = (struct ice_dma_mem *)cq->rq.dma_head;
+
+ /* allocate the mapped buffers */
+ for (i = 0; i < cq->num_rq_entries; i++) {
+ struct ice_aq_desc *desc;
+ struct ice_dma_mem *bi;
+
+ bi = &cq->rq.r.rq_bi[i];
+ bi->va = dmam_alloc_coherent(ice_hw_to_dev(hw),
+ cq->rq_buf_size, &bi->pa,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!bi->va)
+ goto unwind_alloc_rq_bufs;
+ bi->size = cq->rq_buf_size;
+
+ /* now configure the descriptors for use */
+ desc = ICE_CTL_Q_DESC(cq->rq, i);
+
+ desc->flags = cpu_to_le16(ICE_AQ_FLAG_BUF);
+ if (cq->rq_buf_size > ICE_AQ_LG_BUF)
+ desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
+ desc->opcode = 0;
+ /* This is in accordance with Admin queue design, there is no
+ * register for buffer size configuration
+ */
+ desc->datalen = cpu_to_le16(bi->size);
+ desc->retval = 0;
+ desc->cookie_high = 0;
+ desc->cookie_low = 0;
+ desc->params.generic.addr_high =
+ cpu_to_le32(upper_32_bits(bi->pa));
+ desc->params.generic.addr_low =
+ cpu_to_le32(lower_32_bits(bi->pa));
+ desc->params.generic.param0 = 0;
+ desc->params.generic.param1 = 0;
+ }
+ return 0;
+
+unwind_alloc_rq_bufs:
+ /* don't try to free the one that failed... */
+ i--;
+ for (; i >= 0; i--) {
+ dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.r.rq_bi[i].size,
+ cq->rq.r.rq_bi[i].va, cq->rq.r.rq_bi[i].pa);
+ cq->rq.r.rq_bi[i].va = NULL;
+ cq->rq.r.rq_bi[i].pa = 0;
+ cq->rq.r.rq_bi[i].size = 0;
+ }
+ cq->rq.r.rq_bi = NULL;
+ devm_kfree(ice_hw_to_dev(hw), cq->rq.dma_head);
+ cq->rq.dma_head = NULL;
+
+ return -ENOMEM;
+}
+
+/**
+ * ice_alloc_sq_bufs - Allocate empty buffer structs for the ATQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static int
+ice_alloc_sq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+ int i;
+
+ /* No mapped memory needed yet, just the buffer info structures */
+ cq->sq.dma_head = devm_kcalloc(ice_hw_to_dev(hw), cq->num_sq_entries,
+ sizeof(cq->sq.desc_buf), GFP_KERNEL);
+ if (!cq->sq.dma_head)
+ return -ENOMEM;
+ cq->sq.r.sq_bi = (struct ice_dma_mem *)cq->sq.dma_head;
+
+ /* allocate the mapped buffers */
+ for (i = 0; i < cq->num_sq_entries; i++) {
+ struct ice_dma_mem *bi;
+
+ bi = &cq->sq.r.sq_bi[i];
+ bi->va = dmam_alloc_coherent(ice_hw_to_dev(hw),
+ cq->sq_buf_size, &bi->pa,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!bi->va)
+ goto unwind_alloc_sq_bufs;
+ bi->size = cq->sq_buf_size;
+ }
+ return 0;
+
+unwind_alloc_sq_bufs:
+ /* don't try to free the one that failed... */
+ i--;
+ for (; i >= 0; i--) {
+ dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.r.sq_bi[i].size,
+ cq->sq.r.sq_bi[i].va, cq->sq.r.sq_bi[i].pa);
+ cq->sq.r.sq_bi[i].va = NULL;
+ cq->sq.r.sq_bi[i].pa = 0;
+ cq->sq.r.sq_bi[i].size = 0;
+ }
+ cq->sq.r.sq_bi = NULL;
+ devm_kfree(ice_hw_to_dev(hw), cq->sq.dma_head);
+ cq->sq.dma_head = NULL;
+
+ return -ENOMEM;
+}
+
+static int
+ice_cfg_cq_regs(struct ice_hw *hw, struct ice_ctl_q_ring *ring, u16 num_entries)
+{
+ /* Clear Head and Tail */
+ wr32(hw, ring->head, 0);
+ wr32(hw, ring->tail, 0);
+
+ /* set starting point */
+ wr32(hw, ring->len, (num_entries | ring->len_ena_mask));
+ wr32(hw, ring->bal, lower_32_bits(ring->desc_buf.pa));
+ wr32(hw, ring->bah, upper_32_bits(ring->desc_buf.pa));
+
+ /* Check one register to verify that config was applied */
+ if (rd32(hw, ring->bal) != lower_32_bits(ring->desc_buf.pa))
+ return -EIO;
+
+ return 0;
+}
+
+/**
+ * ice_cfg_sq_regs - configure Control ATQ registers
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * Configure base address and length registers for the transmit queue
+ */
+static int ice_cfg_sq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+ return ice_cfg_cq_regs(hw, &cq->sq, cq->num_sq_entries);
+}
+
+/**
+ * ice_cfg_rq_regs - configure Control ARQ register
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * Configure base address and length registers for the receive (event queue)
+ */
+static int ice_cfg_rq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+ int status;
+
+ status = ice_cfg_cq_regs(hw, &cq->rq, cq->num_rq_entries);
+ if (status)
+ return status;
+
+ /* Update tail in the HW to post pre-allocated buffers */
+ wr32(hw, cq->rq.tail, (u32)(cq->num_rq_entries - 1));
+
+ return 0;
+}
+
+#define ICE_FREE_CQ_BUFS(hw, qi, ring) \
+do { \
+ /* free descriptors */ \
+ if ((qi)->ring.r.ring##_bi) { \
+ int i; \
+ \
+ for (i = 0; i < (qi)->num_##ring##_entries; i++) \
+ if ((qi)->ring.r.ring##_bi[i].pa) { \
+ dmam_free_coherent(ice_hw_to_dev(hw), \
+ (qi)->ring.r.ring##_bi[i].size, \
+ (qi)->ring.r.ring##_bi[i].va, \
+ (qi)->ring.r.ring##_bi[i].pa); \
+ (qi)->ring.r.ring##_bi[i].va = NULL;\
+ (qi)->ring.r.ring##_bi[i].pa = 0;\
+ (qi)->ring.r.ring##_bi[i].size = 0;\
+ } \
+ } \
+ /* free the buffer info list */ \
+ if ((qi)->ring.cmd_buf) \
+ devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf); \
+ /* free DMA head */ \
+ devm_kfree(ice_hw_to_dev(hw), (qi)->ring.dma_head); \
+} while (0)
+
+/**
+ * ice_init_sq - main initialization routine for Control ATQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * This is the main initialization routine for the Control Send Queue
+ * Prior to calling this function, the driver *MUST* set the following fields
+ * in the cq->structure:
+ * - cq->num_sq_entries
+ * - cq->sq_buf_size
+ *
+ * Do *NOT* hold the lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ */
+static int ice_init_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+ int ret_code;
+
+ if (cq->sq.count > 0) {
+ /* queue already initialized */
+ ret_code = -EBUSY;
+ goto init_ctrlq_exit;
+ }
+
+ /* verify input for valid configuration */
+ if (!cq->num_sq_entries || !cq->sq_buf_size) {
+ ret_code = -EIO;
+ goto init_ctrlq_exit;
+ }
+
+ cq->sq.next_to_use = 0;
+ cq->sq.next_to_clean = 0;
+
+ /* allocate the ring memory */
+ ret_code = ice_alloc_ctrlq_sq_ring(hw, cq);
+ if (ret_code)
+ goto init_ctrlq_exit;
+
+ /* allocate buffers in the rings */
+ ret_code = ice_alloc_sq_bufs(hw, cq);
+ if (ret_code)
+ goto init_ctrlq_free_rings;
+
+ /* initialize base registers */
+ ret_code = ice_cfg_sq_regs(hw, cq);
+ if (ret_code)
+ goto init_ctrlq_free_rings;
+
+ /* success! */
+ cq->sq.count = cq->num_sq_entries;
+ goto init_ctrlq_exit;
+
+init_ctrlq_free_rings:
+ ICE_FREE_CQ_BUFS(hw, cq, sq);
+ ice_free_cq_ring(hw, &cq->sq);
+
+init_ctrlq_exit:
+ return ret_code;
+}
+
+/**
+ * ice_init_rq - initialize ARQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * The main initialization routine for the Admin Receive (Event) Queue.
+ * Prior to calling this function, the driver *MUST* set the following fields
+ * in the cq->structure:
+ * - cq->num_rq_entries
+ * - cq->rq_buf_size
+ *
+ * Do *NOT* hold the lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ */
+static int ice_init_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+ int ret_code;
+
+ if (cq->rq.count > 0) {
+ /* queue already initialized */
+ ret_code = -EBUSY;
+ goto init_ctrlq_exit;
+ }
+
+ /* verify input for valid configuration */
+ if (!cq->num_rq_entries || !cq->rq_buf_size) {
+ ret_code = -EIO;
+ goto init_ctrlq_exit;
+ }
+
+ cq->rq.next_to_use = 0;
+ cq->rq.next_to_clean = 0;
+
+ /* allocate the ring memory */
+ ret_code = ice_alloc_ctrlq_rq_ring(hw, cq);
+ if (ret_code)
+ goto init_ctrlq_exit;
+
+ /* allocate buffers in the rings */
+ ret_code = ice_alloc_rq_bufs(hw, cq);
+ if (ret_code)
+ goto init_ctrlq_free_rings;
+
+ /* initialize base registers */
+ ret_code = ice_cfg_rq_regs(hw, cq);
+ if (ret_code)
+ goto init_ctrlq_free_rings;
+
+ /* success! */
+ cq->rq.count = cq->num_rq_entries;
+ goto init_ctrlq_exit;
+
+init_ctrlq_free_rings:
+ ICE_FREE_CQ_BUFS(hw, cq, rq);
+ ice_free_cq_ring(hw, &cq->rq);
+
+init_ctrlq_exit:
+ return ret_code;
+}
+
+/**
+ * ice_shutdown_sq - shutdown the Control ATQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * The main shutdown routine for the Control Transmit Queue
+ */
+static int ice_shutdown_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+ int ret_code = 0;
+
+ mutex_lock(&cq->sq_lock);
+
+ if (!cq->sq.count) {
+ ret_code = -EBUSY;
+ goto shutdown_sq_out;
+ }
+
+ /* Stop firmware AdminQ processing */
+ wr32(hw, cq->sq.head, 0);
+ wr32(hw, cq->sq.tail, 0);
+ wr32(hw, cq->sq.len, 0);
+ wr32(hw, cq->sq.bal, 0);
+ wr32(hw, cq->sq.bah, 0);
+
+ cq->sq.count = 0; /* to indicate uninitialized queue */
+
+ /* free ring buffers and the ring itself */
+ ICE_FREE_CQ_BUFS(hw, cq, sq);
+ ice_free_cq_ring(hw, &cq->sq);
+
+shutdown_sq_out:
+ mutex_unlock(&cq->sq_lock);
+ return ret_code;
+}
+
+/**
+ * ice_aq_ver_check - Check the reported AQ API version.
+ * @hw: pointer to the hardware structure
+ *
+ * Checks if the driver should load on a given AQ API version.
+ *
+ * Return: 'true' iff the driver should attempt to load. 'false' otherwise.
+ */
+static bool ice_aq_ver_check(struct ice_hw *hw)
+{
+ if (hw->api_maj_ver > EXP_FW_API_VER_MAJOR) {
+ /* Major API version is newer than expected, don't load */
+ dev_warn(ice_hw_to_dev(hw),
+ "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n");
+ return false;
+ } else if (hw->api_maj_ver == EXP_FW_API_VER_MAJOR) {
+ if (hw->api_min_ver > (EXP_FW_API_VER_MINOR + 2))
+ dev_info(ice_hw_to_dev(hw),
+ "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n");
+ else if ((hw->api_min_ver + 2) < EXP_FW_API_VER_MINOR)
+ dev_info(ice_hw_to_dev(hw),
+ "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
+ } else {
+ /* Major API version is older than expected, log a warning */
+ dev_info(ice_hw_to_dev(hw),
+ "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
+ }
+ return true;
+}
+
+/**
+ * ice_shutdown_rq - shutdown Control ARQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * The main shutdown routine for the Control Receive Queue
+ */
+static int ice_shutdown_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+ int ret_code = 0;
+
+ mutex_lock(&cq->rq_lock);
+
+ if (!cq->rq.count) {
+ ret_code = -EBUSY;
+ goto shutdown_rq_out;
+ }
+
+ /* Stop Control Queue processing */
+ wr32(hw, cq->rq.head, 0);
+ wr32(hw, cq->rq.tail, 0);
+ wr32(hw, cq->rq.len, 0);
+ wr32(hw, cq->rq.bal, 0);
+ wr32(hw, cq->rq.bah, 0);
+
+ /* set rq.count to 0 to indicate uninitialized queue */
+ cq->rq.count = 0;
+
+ /* free ring buffers and the ring itself */
+ ICE_FREE_CQ_BUFS(hw, cq, rq);
+ ice_free_cq_ring(hw, &cq->rq);
+
+shutdown_rq_out:
+ mutex_unlock(&cq->rq_lock);
+ return ret_code;
+}
+
+/**
+ * ice_init_check_adminq - Check version for Admin Queue to know if its alive
+ * @hw: pointer to the hardware structure
+ */
+static int ice_init_check_adminq(struct ice_hw *hw)
+{
+ struct ice_ctl_q_info *cq = &hw->adminq;
+ int status;
+
+ status = ice_aq_get_fw_ver(hw, NULL);
+ if (status)
+ goto init_ctrlq_free_rq;
+
+ if (!ice_aq_ver_check(hw)) {
+ status = -EIO;
+ goto init_ctrlq_free_rq;
+ }
+
+ return 0;
+
+init_ctrlq_free_rq:
+ ice_shutdown_rq(hw, cq);
+ ice_shutdown_sq(hw, cq);
+ return status;
+}
+
+/**
+ * ice_init_ctrlq - main initialization routine for any control Queue
+ * @hw: pointer to the hardware structure
+ * @q_type: specific Control queue type
+ *
+ * Prior to calling this function, the driver *MUST* set the following fields
+ * in the cq->structure:
+ * - cq->num_sq_entries
+ * - cq->num_rq_entries
+ * - cq->rq_buf_size
+ * - cq->sq_buf_size
+ *
+ * NOTE: this function does not initialize the controlq locks
+ */
+static int ice_init_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
+{
+ struct ice_ctl_q_info *cq;
+ int ret_code;
+
+ switch (q_type) {
+ case ICE_CTL_Q_ADMIN:
+ ice_adminq_init_regs(hw);
+ cq = &hw->adminq;
+ break;
+ case ICE_CTL_Q_SB:
+ ice_sb_init_regs(hw);
+ cq = &hw->sbq;
+ break;
+ case ICE_CTL_Q_MAILBOX:
+ ice_mailbox_init_regs(hw);
+ cq = &hw->mailboxq;
+ break;
+ default:
+ return -EINVAL;
+ }
+ cq->qtype = q_type;
+
+ /* verify input for valid configuration */
+ if (!cq->num_rq_entries || !cq->num_sq_entries ||
+ !cq->rq_buf_size || !cq->sq_buf_size) {
+ return -EIO;
+ }
+
+ /* setup SQ command write back timeout */
+ cq->sq_cmd_timeout = ICE_CTL_Q_SQ_CMD_TIMEOUT;
+
+ /* allocate the ATQ */
+ ret_code = ice_init_sq(hw, cq);
+ if (ret_code)
+ return ret_code;
+
+ /* allocate the ARQ */
+ ret_code = ice_init_rq(hw, cq);
+ if (ret_code)
+ goto init_ctrlq_free_sq;
+
+ /* success! */
+ return 0;
+
+init_ctrlq_free_sq:
+ ice_shutdown_sq(hw, cq);
+ return ret_code;
+}
+
+/**
+ * ice_is_sbq_supported - is the sideband queue supported
+ * @hw: pointer to the hardware structure
+ *
+ * Returns true if the sideband control queue interface is
+ * supported for the device, false otherwise
+ */
+bool ice_is_sbq_supported(struct ice_hw *hw)
+{
+ /* The device sideband queue is only supported on devices with the
+ * generic MAC type.
+ */
+ return hw->mac_type == ICE_MAC_GENERIC;
+}
+
+/**
+ * ice_get_sbq - returns the right control queue to use for sideband
+ * @hw: pointer to the hardware structure
+ */
+struct ice_ctl_q_info *ice_get_sbq(struct ice_hw *hw)
+{
+ if (ice_is_sbq_supported(hw))
+ return &hw->sbq;
+ return &hw->adminq;
+}
+
+/**
+ * ice_shutdown_ctrlq - shutdown routine for any control queue
+ * @hw: pointer to the hardware structure
+ * @q_type: specific Control queue type
+ *
+ * NOTE: this function does not destroy the control queue locks.
+ */
+static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
+{
+ struct ice_ctl_q_info *cq;
+
+ switch (q_type) {
+ case ICE_CTL_Q_ADMIN:
+ cq = &hw->adminq;
+ if (ice_check_sq_alive(hw, cq))
+ ice_aq_q_shutdown(hw, true);
+ break;
+ case ICE_CTL_Q_SB:
+ cq = &hw->sbq;
+ break;
+ case ICE_CTL_Q_MAILBOX:
+ cq = &hw->mailboxq;
+ break;
+ default:
+ return;
+ }
+
+ ice_shutdown_sq(hw, cq);
+ ice_shutdown_rq(hw, cq);
+}
+
+/**
+ * ice_shutdown_all_ctrlq - shutdown routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * NOTE: this function does not destroy the control queue locks. The driver
+ * may call this at runtime to shutdown and later restart control queues, such
+ * as in response to a reset event.
+ */
+void ice_shutdown_all_ctrlq(struct ice_hw *hw)
+{
+ /* Shutdown FW admin queue */
+ ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+ /* Shutdown PHY Sideband */
+ if (ice_is_sbq_supported(hw))
+ ice_shutdown_ctrlq(hw, ICE_CTL_Q_SB);
+ /* Shutdown PF-VF Mailbox */
+ ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX);
+}
+
+/**
+ * ice_init_all_ctrlq - main initialization routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * Prior to calling this function, the driver MUST* set the following fields
+ * in the cq->structure for all control queues:
+ * - cq->num_sq_entries
+ * - cq->num_rq_entries
+ * - cq->rq_buf_size
+ * - cq->sq_buf_size
+ *
+ * NOTE: this function does not initialize the controlq locks.
+ */
+int ice_init_all_ctrlq(struct ice_hw *hw)
+{
+ u32 retry = 0;
+ int status;
+
+ /* Init FW admin queue */
+ do {
+ status = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN);
+ if (status)
+ return status;
+
+ status = ice_init_check_adminq(hw);
+ if (status != -EIO)
+ break;
+
+ ice_debug(hw, ICE_DBG_AQ_MSG, "Retry Admin Queue init due to FW critical error\n");
+ ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+ msleep(ICE_CTL_Q_ADMIN_INIT_MSEC);
+ } while (retry++ < ICE_CTL_Q_ADMIN_INIT_TIMEOUT);
+
+ if (status)
+ return status;
+ /* sideband control queue (SBQ) interface is not supported on some
+ * devices. Initialize if supported, else fallback to the admin queue
+ * interface
+ */
+ if (ice_is_sbq_supported(hw)) {
+ status = ice_init_ctrlq(hw, ICE_CTL_Q_SB);
+ if (status)
+ return status;
+ }
+ /* Init Mailbox queue */
+ return ice_init_ctrlq(hw, ICE_CTL_Q_MAILBOX);
+}
+
+/**
+ * ice_init_ctrlq_locks - Initialize locks for a control queue
+ * @cq: pointer to the control queue
+ *
+ * Initializes the send and receive queue locks for a given control queue.
+ */
+static void ice_init_ctrlq_locks(struct ice_ctl_q_info *cq)
+{
+ mutex_init(&cq->sq_lock);
+ mutex_init(&cq->rq_lock);
+}
+
+/**
+ * ice_create_all_ctrlq - main initialization routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * Prior to calling this function, the driver *MUST* set the following fields
+ * in the cq->structure for all control queues:
+ * - cq->num_sq_entries
+ * - cq->num_rq_entries
+ * - cq->rq_buf_size
+ * - cq->sq_buf_size
+ *
+ * This function creates all the control queue locks and then calls
+ * ice_init_all_ctrlq. It should be called once during driver load. If the
+ * driver needs to re-initialize control queues at run time it should call
+ * ice_init_all_ctrlq instead.
+ */
+int ice_create_all_ctrlq(struct ice_hw *hw)
+{
+ ice_init_ctrlq_locks(&hw->adminq);
+ if (ice_is_sbq_supported(hw))
+ ice_init_ctrlq_locks(&hw->sbq);
+ ice_init_ctrlq_locks(&hw->mailboxq);
+
+ return ice_init_all_ctrlq(hw);
+}
+
+/**
+ * ice_destroy_ctrlq_locks - Destroy locks for a control queue
+ * @cq: pointer to the control queue
+ *
+ * Destroys the send and receive queue locks for a given control queue.
+ */
+static void ice_destroy_ctrlq_locks(struct ice_ctl_q_info *cq)
+{
+ mutex_destroy(&cq->sq_lock);
+ mutex_destroy(&cq->rq_lock);
+}
+
+/**
+ * ice_destroy_all_ctrlq - exit routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * This function shuts down all the control queues and then destroys the
+ * control queue locks. It should be called once during driver unload. The
+ * driver should call ice_shutdown_all_ctrlq if it needs to shut down and
+ * reinitialize control queues, such as in response to a reset event.
+ */
+void ice_destroy_all_ctrlq(struct ice_hw *hw)
+{
+ /* shut down all the control queues first */
+ ice_shutdown_all_ctrlq(hw);
+
+ ice_destroy_ctrlq_locks(&hw->adminq);
+ if (ice_is_sbq_supported(hw))
+ ice_destroy_ctrlq_locks(&hw->sbq);
+ ice_destroy_ctrlq_locks(&hw->mailboxq);
+}
+
+/**
+ * ice_clean_sq - cleans Admin send queue (ATQ)
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * returns the number of free desc
+ */
+static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+ struct ice_ctl_q_ring *sq = &cq->sq;
+ u16 ntc = sq->next_to_clean;
+ struct ice_sq_cd *details;
+ struct ice_aq_desc *desc;
+
+ desc = ICE_CTL_Q_DESC(*sq, ntc);
+ details = ICE_CTL_Q_DETAILS(*sq, ntc);
+
+ while (rd32(hw, cq->sq.head) != ntc) {
+ ice_debug(hw, ICE_DBG_AQ_MSG, "ntc %d head %d.\n", ntc, rd32(hw, cq->sq.head));
+ memset(desc, 0, sizeof(*desc));
+ memset(details, 0, sizeof(*details));
+ ntc++;
+ if (ntc == sq->count)
+ ntc = 0;
+ desc = ICE_CTL_Q_DESC(*sq, ntc);
+ details = ICE_CTL_Q_DETAILS(*sq, ntc);
+ }
+
+ sq->next_to_clean = ntc;
+
+ return ICE_CTL_Q_DESC_UNUSED(sq);
+}
+
+/**
+ * ice_debug_cq
+ * @hw: pointer to the hardware structure
+ * @desc: pointer to control queue descriptor
+ * @buf: pointer to command buffer
+ * @buf_len: max length of buf
+ *
+ * Dumps debug log about control command with descriptor contents.
+ */
+static void ice_debug_cq(struct ice_hw *hw, void *desc, void *buf, u16 buf_len)
+{
+ struct ice_aq_desc *cq_desc = desc;
+ u16 len;
+
+ if (!IS_ENABLED(CONFIG_DYNAMIC_DEBUG) &&
+ !((ICE_DBG_AQ_DESC | ICE_DBG_AQ_DESC_BUF) & hw->debug_mask))
+ return;
+
+ if (!desc)
+ return;
+
+ len = le16_to_cpu(cq_desc->datalen);
+
+ ice_debug(hw, ICE_DBG_AQ_DESC, "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
+ le16_to_cpu(cq_desc->opcode),
+ le16_to_cpu(cq_desc->flags),
+ le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval));
+ ice_debug(hw, ICE_DBG_AQ_DESC, "\tcookie (h,l) 0x%08X 0x%08X\n",
+ le32_to_cpu(cq_desc->cookie_high),
+ le32_to_cpu(cq_desc->cookie_low));
+ ice_debug(hw, ICE_DBG_AQ_DESC, "\tparam (0,1) 0x%08X 0x%08X\n",
+ le32_to_cpu(cq_desc->params.generic.param0),
+ le32_to_cpu(cq_desc->params.generic.param1));
+ ice_debug(hw, ICE_DBG_AQ_DESC, "\taddr (h,l) 0x%08X 0x%08X\n",
+ le32_to_cpu(cq_desc->params.generic.addr_high),
+ le32_to_cpu(cq_desc->params.generic.addr_low));
+ if (buf && cq_desc->datalen != 0) {
+ ice_debug(hw, ICE_DBG_AQ_DESC_BUF, "Buffer:\n");
+ if (buf_len < len)
+ len = buf_len;
+
+ ice_debug_array(hw, ICE_DBG_AQ_DESC_BUF, 16, 1, buf, len);
+ }
+}
+
+/**
+ * ice_sq_done - check if FW has processed the Admin Send Queue (ATQ)
+ * @hw: pointer to the HW struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Returns true if the firmware has processed all descriptors on the
+ * admin send queue. Returns false if there are still requests pending.
+ */
+static bool ice_sq_done(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+ /* AQ designers suggest use of head for better
+ * timing reliability than DD bit
+ */
+ return rd32(hw, cq->sq.head) == cq->sq.next_to_use;
+}
+
+/**
+ * ice_sq_send_cmd - send command to Control Queue (ATQ)
+ * @hw: pointer to the HW struct
+ * @cq: pointer to the specific Control queue
+ * @desc: prefilled descriptor describing the command
+ * @buf: buffer to use for indirect commands (or NULL for direct commands)
+ * @buf_size: size of buffer for indirect commands (or 0 for direct commands)
+ * @cd: pointer to command details structure
+ *
+ * This is the main send command routine for the ATQ. It runs the queue,
+ * cleans the queue, etc.
+ */
+int
+ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+ struct ice_aq_desc *desc, void *buf, u16 buf_size,
+ struct ice_sq_cd *cd)
+{
+ struct ice_dma_mem *dma_buf = NULL;
+ struct ice_aq_desc *desc_on_ring;
+ bool cmd_completed = false;
+ struct ice_sq_cd *details;
+ u32 total_delay = 0;
+ int status = 0;
+ u16 retval = 0;
+ u32 val = 0;
+
+ /* if reset is in progress return a soft error */
+ if (hw->reset_ongoing)
+ return -EBUSY;
+ mutex_lock(&cq->sq_lock);
+
+ cq->sq_last_status = ICE_AQ_RC_OK;
+
+ if (!cq->sq.count) {
+ ice_debug(hw, ICE_DBG_AQ_MSG, "Control Send queue not initialized.\n");
+ status = -EIO;
+ goto sq_send_command_error;
+ }
+
+ if ((buf && !buf_size) || (!buf && buf_size)) {
+ status = -EINVAL;
+ goto sq_send_command_error;
+ }
+
+ if (buf) {
+ if (buf_size > cq->sq_buf_size) {
+ ice_debug(hw, ICE_DBG_AQ_MSG, "Invalid buffer size for Control Send queue: %d.\n",
+ buf_size);
+ status = -EINVAL;
+ goto sq_send_command_error;
+ }
+
+ desc->flags |= cpu_to_le16(ICE_AQ_FLAG_BUF);
+ if (buf_size > ICE_AQ_LG_BUF)
+ desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
+ }
+
+ val = rd32(hw, cq->sq.head);
+ if (val >= cq->num_sq_entries) {
+ ice_debug(hw, ICE_DBG_AQ_MSG, "head overrun at %d in the Control Send Queue ring\n",
+ val);
+ status = -EIO;
+ goto sq_send_command_error;
+ }
+
+ details = ICE_CTL_Q_DETAILS(cq->sq, cq->sq.next_to_use);
+ if (cd)
+ *details = *cd;
+ else
+ memset(details, 0, sizeof(*details));
+
+ /* Call clean and check queue available function to reclaim the
+ * descriptors that were processed by FW/MBX; the function returns the
+ * number of desc available. The clean function called here could be
+ * called in a separate thread in case of asynchronous completions.
+ */
+ if (ice_clean_sq(hw, cq) == 0) {
+ ice_debug(hw, ICE_DBG_AQ_MSG, "Error: Control Send Queue is full.\n");
+ status = -ENOSPC;
+ goto sq_send_command_error;
+ }
+
+ /* initialize the temp desc pointer with the right desc */
+ desc_on_ring = ICE_CTL_Q_DESC(cq->sq, cq->sq.next_to_use);
+
+ /* if the desc is available copy the temp desc to the right place */
+ memcpy(desc_on_ring, desc, sizeof(*desc_on_ring));
+
+ /* if buf is not NULL assume indirect command */
+ if (buf) {
+ dma_buf = &cq->sq.r.sq_bi[cq->sq.next_to_use];
+ /* copy the user buf into the respective DMA buf */
+ memcpy(dma_buf->va, buf, buf_size);
+ desc_on_ring->datalen = cpu_to_le16(buf_size);
+
+ /* Update the address values in the desc with the pa value
+ * for respective buffer
+ */
+ desc_on_ring->params.generic.addr_high =
+ cpu_to_le32(upper_32_bits(dma_buf->pa));
+ desc_on_ring->params.generic.addr_low =
+ cpu_to_le32(lower_32_bits(dma_buf->pa));
+ }
+
+ /* Debug desc and buffer */
+ ice_debug(hw, ICE_DBG_AQ_DESC, "ATQ: Control Send queue desc and buffer:\n");
+
+ ice_debug_cq(hw, (void *)desc_on_ring, buf, buf_size);
+
+ (cq->sq.next_to_use)++;
+ if (cq->sq.next_to_use == cq->sq.count)
+ cq->sq.next_to_use = 0;
+ wr32(hw, cq->sq.tail, cq->sq.next_to_use);
+
+ do {
+ if (ice_sq_done(hw, cq))
+ break;
+
+ udelay(ICE_CTL_Q_SQ_CMD_USEC);
+ total_delay++;
+ } while (total_delay < cq->sq_cmd_timeout);
+
+ /* if ready, copy the desc back to temp */
+ if (ice_sq_done(hw, cq)) {
+ memcpy(desc, desc_on_ring, sizeof(*desc));
+ if (buf) {
+ /* get returned length to copy */
+ u16 copy_size = le16_to_cpu(desc->datalen);
+
+ if (copy_size > buf_size) {
+ ice_debug(hw, ICE_DBG_AQ_MSG, "Return len %d > than buf len %d\n",
+ copy_size, buf_size);
+ status = -EIO;
+ } else {
+ memcpy(buf, dma_buf->va, copy_size);
+ }
+ }
+ retval = le16_to_cpu(desc->retval);
+ if (retval) {
+ ice_debug(hw, ICE_DBG_AQ_MSG, "Control Send Queue command 0x%04X completed with error 0x%X\n",
+ le16_to_cpu(desc->opcode),
+ retval);
+
+ /* strip off FW internal code */
+ retval &= 0xff;
+ }
+ cmd_completed = true;
+ if (!status && retval != ICE_AQ_RC_OK)
+ status = -EIO;
+ cq->sq_last_status = (enum ice_aq_err)retval;
+ }
+
+ ice_debug(hw, ICE_DBG_AQ_MSG, "ATQ: desc and buffer writeback:\n");
+
+ ice_debug_cq(hw, (void *)desc, buf, buf_size);
+
+ /* save writeback AQ if requested */
+ if (details->wb_desc)
+ memcpy(details->wb_desc, desc_on_ring,
+ sizeof(*details->wb_desc));
+
+ /* update the error if time out occurred */
+ if (!cmd_completed) {
+ if (rd32(hw, cq->rq.len) & cq->rq.len_crit_mask ||
+ rd32(hw, cq->sq.len) & cq->sq.len_crit_mask) {
+ ice_debug(hw, ICE_DBG_AQ_MSG, "Critical FW error.\n");
+ status = -EIO;
+ } else {
+ ice_debug(hw, ICE_DBG_AQ_MSG, "Control Send Queue Writeback timeout.\n");
+ status = -EIO;
+ }
+ }
+
+sq_send_command_error:
+ mutex_unlock(&cq->sq_lock);
+ return status;
+}
+
+/**
+ * ice_fill_dflt_direct_cmd_desc - AQ descriptor helper function
+ * @desc: pointer to the temp descriptor (non DMA mem)
+ * @opcode: the opcode can be used to decide which flags to turn off or on
+ *
+ * Fill the desc with default values
+ */
+void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode)
+{
+ /* zero out the desc */
+ memset(desc, 0, sizeof(*desc));
+ desc->opcode = cpu_to_le16(opcode);
+ desc->flags = cpu_to_le16(ICE_AQ_FLAG_SI);
+}
+
+/**
+ * ice_clean_rq_elem
+ * @hw: pointer to the HW struct
+ * @cq: pointer to the specific Control queue
+ * @e: event info from the receive descriptor, includes any buffers
+ * @pending: number of events that could be left to process
+ *
+ * This function cleans one Admin Receive Queue element and returns
+ * the contents through e. It can also return how many events are
+ * left to process through 'pending'.
+ */
+int
+ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+ struct ice_rq_event_info *e, u16 *pending)
+{
+ u16 ntc = cq->rq.next_to_clean;
+ enum ice_aq_err rq_last_status;
+ struct ice_aq_desc *desc;
+ struct ice_dma_mem *bi;
+ int ret_code = 0;
+ u16 desc_idx;
+ u16 datalen;
+ u16 flags;
+ u16 ntu;
+
+ /* pre-clean the event info */
+ memset(&e->desc, 0, sizeof(e->desc));
+
+ /* take the lock before we start messing with the ring */
+ mutex_lock(&cq->rq_lock);
+
+ if (!cq->rq.count) {
+ ice_debug(hw, ICE_DBG_AQ_MSG, "Control Receive queue not initialized.\n");
+ ret_code = -EIO;
+ goto clean_rq_elem_err;
+ }
+
+ /* set next_to_use to head */
+ ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
+
+ if (ntu == ntc) {
+ /* nothing to do - shouldn't need to update ring's values */
+ ret_code = -EALREADY;
+ goto clean_rq_elem_out;
+ }
+
+ /* now clean the next descriptor */
+ desc = ICE_CTL_Q_DESC(cq->rq, ntc);
+ desc_idx = ntc;
+
+ rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval);
+ flags = le16_to_cpu(desc->flags);
+ if (flags & ICE_AQ_FLAG_ERR) {
+ ret_code = -EIO;
+ ice_debug(hw, ICE_DBG_AQ_MSG, "Control Receive Queue Event 0x%04X received with error 0x%X\n",
+ le16_to_cpu(desc->opcode), rq_last_status);
+ }
+ memcpy(&e->desc, desc, sizeof(e->desc));
+ datalen = le16_to_cpu(desc->datalen);
+ e->msg_len = min_t(u16, datalen, e->buf_len);
+ if (e->msg_buf && e->msg_len)
+ memcpy(e->msg_buf, cq->rq.r.rq_bi[desc_idx].va, e->msg_len);
+
+ ice_debug(hw, ICE_DBG_AQ_DESC, "ARQ: desc and buffer:\n");
+
+ ice_debug_cq(hw, (void *)desc, e->msg_buf, cq->rq_buf_size);
+
+ /* Restore the original datalen and buffer address in the desc,
+ * FW updates datalen to indicate the event message size
+ */
+ bi = &cq->rq.r.rq_bi[ntc];
+ memset(desc, 0, sizeof(*desc));
+
+ desc->flags = cpu_to_le16(ICE_AQ_FLAG_BUF);
+ if (cq->rq_buf_size > ICE_AQ_LG_BUF)
+ desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
+ desc->datalen = cpu_to_le16(bi->size);
+ desc->params.generic.addr_high = cpu_to_le32(upper_32_bits(bi->pa));
+ desc->params.generic.addr_low = cpu_to_le32(lower_32_bits(bi->pa));
+
+ /* set tail = the last cleaned desc index. */
+ wr32(hw, cq->rq.tail, ntc);
+ /* ntc is updated to tail + 1 */
+ ntc++;
+ if (ntc == cq->num_rq_entries)
+ ntc = 0;
+ cq->rq.next_to_clean = ntc;
+ cq->rq.next_to_use = ntu;
+
+clean_rq_elem_out:
+ /* Set pending if needed, unlock and return */
+ if (pending) {
+ /* re-read HW head to calculate actual pending messages */
+ ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
+ *pending = (u16)((ntc > ntu ? cq->rq.count : 0) + (ntu - ntc));
+ }
+clean_rq_elem_err:
+ mutex_unlock(&cq->rq_lock);
+
+ return ret_code;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
new file mode 100644
index 000000000..c07e9cc9f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_CONTROLQ_H_
+#define _ICE_CONTROLQ_H_
+
+#include "ice_adminq_cmd.h"
+
+/* Maximum buffer lengths for all control queue types */
+#define ICE_AQ_MAX_BUF_LEN 4096
+#define ICE_MBXQ_MAX_BUF_LEN 4096
+#define ICE_SBQ_MAX_BUF_LEN 512
+
+#define ICE_CTL_Q_DESC(R, i) \
+ (&(((struct ice_aq_desc *)((R).desc_buf.va))[i]))
+
+#define ICE_CTL_Q_DESC_UNUSED(R) \
+ ((u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+ (R)->next_to_clean - (R)->next_to_use - 1))
+
+/* Defines that help manage the driver vs FW API checks.
+ * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage.
+ */
+#define EXP_FW_API_VER_BRANCH 0x00
+#define EXP_FW_API_VER_MAJOR 0x01
+#define EXP_FW_API_VER_MINOR 0x05
+
+/* Different control queue types: These are mainly for SW consumption. */
+enum ice_ctl_q {
+ ICE_CTL_Q_UNKNOWN = 0,
+ ICE_CTL_Q_ADMIN,
+ ICE_CTL_Q_MAILBOX,
+ ICE_CTL_Q_SB,
+};
+
+/* Control Queue timeout settings - max delay 1s */
+#define ICE_CTL_Q_SQ_CMD_TIMEOUT 10000 /* Count 10000 times */
+#define ICE_CTL_Q_SQ_CMD_USEC 100 /* Check every 100usec */
+#define ICE_CTL_Q_ADMIN_INIT_TIMEOUT 10 /* Count 10 times */
+#define ICE_CTL_Q_ADMIN_INIT_MSEC 100 /* Check every 100msec */
+
+struct ice_ctl_q_ring {
+ void *dma_head; /* Virtual address to DMA head */
+ struct ice_dma_mem desc_buf; /* descriptor ring memory */
+ void *cmd_buf; /* command buffer memory */
+
+ union {
+ struct ice_dma_mem *sq_bi;
+ struct ice_dma_mem *rq_bi;
+ } r;
+
+ u16 count; /* Number of descriptors */
+
+ /* used for interrupt processing */
+ u16 next_to_use;
+ u16 next_to_clean;
+
+ /* used for queue tracking */
+ u32 head;
+ u32 tail;
+ u32 len;
+ u32 bah;
+ u32 bal;
+ u32 len_mask;
+ u32 len_ena_mask;
+ u32 len_crit_mask;
+ u32 head_mask;
+};
+
+/* sq transaction details */
+struct ice_sq_cd {
+ struct ice_aq_desc *wb_desc;
+};
+
+#define ICE_CTL_Q_DETAILS(R, i) (&(((struct ice_sq_cd *)((R).cmd_buf))[i]))
+
+/* rq event information */
+struct ice_rq_event_info {
+ struct ice_aq_desc desc;
+ u16 msg_len;
+ u16 buf_len;
+ u8 *msg_buf;
+};
+
+/* Control Queue information */
+struct ice_ctl_q_info {
+ enum ice_ctl_q qtype;
+ struct ice_ctl_q_ring rq; /* receive queue */
+ struct ice_ctl_q_ring sq; /* send queue */
+ u32 sq_cmd_timeout; /* send queue cmd write back timeout */
+ u16 num_rq_entries; /* receive queue depth */
+ u16 num_sq_entries; /* send queue depth */
+ u16 rq_buf_size; /* receive queue buffer size */
+ u16 sq_buf_size; /* send queue buffer size */
+ enum ice_aq_err sq_last_status; /* last status on send queue */
+ struct mutex sq_lock; /* Send queue lock */
+ struct mutex rq_lock; /* Receive queue lock */
+};
+
+#endif /* _ICE_CONTROLQ_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
new file mode 100644
index 000000000..6375372f8
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -0,0 +1,1617 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_sched.h"
+#include "ice_dcb.h"
+
+/**
+ * ice_aq_get_lldp_mib
+ * @hw: pointer to the HW struct
+ * @bridge_type: type of bridge requested
+ * @mib_type: Local, Remote or both Local and Remote MIBs
+ * @buf: pointer to the caller-supplied buffer to store the MIB block
+ * @buf_size: size of the buffer (in bytes)
+ * @local_len: length of the returned Local LLDP MIB
+ * @remote_len: length of the returned Remote LLDP MIB
+ * @cd: pointer to command details structure or NULL
+ *
+ * Requests the complete LLDP MIB (entire packet). (0x0A00)
+ */
+static int
+ice_aq_get_lldp_mib(struct ice_hw *hw, u8 bridge_type, u8 mib_type, void *buf,
+ u16 buf_size, u16 *local_len, u16 *remote_len,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_lldp_get_mib *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ cmd = &desc.params.lldp_get_mib;
+
+ if (buf_size == 0 || !buf)
+ return -EINVAL;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_get_mib);
+
+ cmd->type = mib_type & ICE_AQ_LLDP_MIB_TYPE_M;
+ cmd->type |= (bridge_type << ICE_AQ_LLDP_BRID_TYPE_S) &
+ ICE_AQ_LLDP_BRID_TYPE_M;
+
+ desc.datalen = cpu_to_le16(buf_size);
+
+ status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+ if (!status) {
+ if (local_len)
+ *local_len = le16_to_cpu(cmd->local_len);
+ if (remote_len)
+ *remote_len = le16_to_cpu(cmd->remote_len);
+ }
+
+ return status;
+}
+
+/**
+ * ice_aq_cfg_lldp_mib_change
+ * @hw: pointer to the HW struct
+ * @ena_update: Enable or Disable event posting
+ * @cd: pointer to command details structure or NULL
+ *
+ * Enable or Disable posting of an event on ARQ when LLDP MIB
+ * associated with the interface changes (0x0A01)
+ */
+static int
+ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_lldp_set_mib_change *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.lldp_set_event;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_set_mib_change);
+
+ if (!ena_update)
+ cmd->command |= ICE_AQ_LLDP_MIB_UPDATE_DIS;
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_stop_lldp
+ * @hw: pointer to the HW struct
+ * @shutdown_lldp_agent: True if LLDP Agent needs to be Shutdown
+ * False if LLDP Agent needs to be Stopped
+ * @persist: True if Stop/Shutdown of LLDP Agent needs to be persistent across
+ * reboots
+ * @cd: pointer to command details structure or NULL
+ *
+ * Stop or Shutdown the embedded LLDP Agent (0x0A05)
+ */
+int
+ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, bool persist,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_lldp_stop *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.lldp_stop;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_stop);
+
+ if (shutdown_lldp_agent)
+ cmd->command |= ICE_AQ_LLDP_AGENT_SHUTDOWN;
+
+ if (persist)
+ cmd->command |= ICE_AQ_LLDP_AGENT_PERSIST_DIS;
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_start_lldp
+ * @hw: pointer to the HW struct
+ * @persist: True if Start of LLDP Agent needs to be persistent across reboots
+ * @cd: pointer to command details structure or NULL
+ *
+ * Start the embedded LLDP Agent on all ports. (0x0A06)
+ */
+int ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_lldp_start *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.lldp_start;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_start);
+
+ cmd->command = ICE_AQ_LLDP_AGENT_START;
+
+ if (persist)
+ cmd->command |= ICE_AQ_LLDP_AGENT_PERSIST_ENA;
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_get_dcbx_status
+ * @hw: pointer to the HW struct
+ *
+ * Get the DCBX status from the Firmware
+ */
+static u8 ice_get_dcbx_status(struct ice_hw *hw)
+{
+ u32 reg;
+
+ reg = rd32(hw, PRTDCB_GENS);
+ return (u8)((reg & PRTDCB_GENS_DCBX_STATUS_M) >>
+ PRTDCB_GENS_DCBX_STATUS_S);
+}
+
+/**
+ * ice_parse_ieee_ets_common_tlv
+ * @buf: Data buffer to be parsed for ETS CFG/REC data
+ * @ets_cfg: Container to store parsed data
+ *
+ * Parses the common data of IEEE 802.1Qaz ETS CFG/REC TLV
+ */
+static void
+ice_parse_ieee_ets_common_tlv(u8 *buf, struct ice_dcb_ets_cfg *ets_cfg)
+{
+ u8 offset = 0;
+ int i;
+
+ /* Priority Assignment Table (4 octets)
+ * Octets:| 1 | 2 | 3 | 4 |
+ * -----------------------------------------
+ * |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+ * -----------------------------------------
+ * Bits:|7 4|3 0|7 4|3 0|7 4|3 0|7 4|3 0|
+ * -----------------------------------------
+ */
+ for (i = 0; i < 4; i++) {
+ ets_cfg->prio_table[i * 2] =
+ ((buf[offset] & ICE_IEEE_ETS_PRIO_1_M) >>
+ ICE_IEEE_ETS_PRIO_1_S);
+ ets_cfg->prio_table[i * 2 + 1] =
+ ((buf[offset] & ICE_IEEE_ETS_PRIO_0_M) >>
+ ICE_IEEE_ETS_PRIO_0_S);
+ offset++;
+ }
+
+ /* TC Bandwidth Table (8 octets)
+ * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+ * ---------------------------------
+ * |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+ * ---------------------------------
+ *
+ * TSA Assignment Table (8 octets)
+ * Octets:| 9 | 10| 11| 12| 13| 14| 15| 16|
+ * ---------------------------------
+ * |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+ * ---------------------------------
+ */
+ ice_for_each_traffic_class(i) {
+ ets_cfg->tcbwtable[i] = buf[offset];
+ ets_cfg->tsatable[i] = buf[ICE_MAX_TRAFFIC_CLASS + offset++];
+ }
+}
+
+/**
+ * ice_parse_ieee_etscfg_tlv
+ * @tlv: IEEE 802.1Qaz ETS CFG TLV
+ * @dcbcfg: Local store to update ETS CFG data
+ *
+ * Parses IEEE 802.1Qaz ETS CFG TLV
+ */
+static void
+ice_parse_ieee_etscfg_tlv(struct ice_lldp_org_tlv *tlv,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ struct ice_dcb_ets_cfg *etscfg;
+ u8 *buf = tlv->tlvinfo;
+
+ /* First Octet post subtype
+ * --------------------------
+ * |will-|CBS | Re- | Max |
+ * |ing | |served| TCs |
+ * --------------------------
+ * |1bit | 1bit|3 bits|3bits|
+ */
+ etscfg = &dcbcfg->etscfg;
+ etscfg->willing = ((buf[0] & ICE_IEEE_ETS_WILLING_M) >>
+ ICE_IEEE_ETS_WILLING_S);
+ etscfg->cbs = ((buf[0] & ICE_IEEE_ETS_CBS_M) >> ICE_IEEE_ETS_CBS_S);
+ etscfg->maxtcs = ((buf[0] & ICE_IEEE_ETS_MAXTC_M) >>
+ ICE_IEEE_ETS_MAXTC_S);
+
+ /* Begin parsing at Priority Assignment Table (offset 1 in buf) */
+ ice_parse_ieee_ets_common_tlv(&buf[1], etscfg);
+}
+
+/**
+ * ice_parse_ieee_etsrec_tlv
+ * @tlv: IEEE 802.1Qaz ETS REC TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Parses IEEE 802.1Qaz ETS REC TLV
+ */
+static void
+ice_parse_ieee_etsrec_tlv(struct ice_lldp_org_tlv *tlv,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ u8 *buf = tlv->tlvinfo;
+
+ /* Begin parsing at Priority Assignment Table (offset 1 in buf) */
+ ice_parse_ieee_ets_common_tlv(&buf[1], &dcbcfg->etsrec);
+}
+
+/**
+ * ice_parse_ieee_pfccfg_tlv
+ * @tlv: IEEE 802.1Qaz PFC CFG TLV
+ * @dcbcfg: Local store to update PFC CFG data
+ *
+ * Parses IEEE 802.1Qaz PFC CFG TLV
+ */
+static void
+ice_parse_ieee_pfccfg_tlv(struct ice_lldp_org_tlv *tlv,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ u8 *buf = tlv->tlvinfo;
+
+ /* ----------------------------------------
+ * |will-|MBC | Re- | PFC | PFC Enable |
+ * |ing | |served| cap | |
+ * -----------------------------------------
+ * |1bit | 1bit|2 bits|4bits| 1 octet |
+ */
+ dcbcfg->pfc.willing = ((buf[0] & ICE_IEEE_PFC_WILLING_M) >>
+ ICE_IEEE_PFC_WILLING_S);
+ dcbcfg->pfc.mbc = ((buf[0] & ICE_IEEE_PFC_MBC_M) >> ICE_IEEE_PFC_MBC_S);
+ dcbcfg->pfc.pfccap = ((buf[0] & ICE_IEEE_PFC_CAP_M) >>
+ ICE_IEEE_PFC_CAP_S);
+ dcbcfg->pfc.pfcena = buf[1];
+}
+
+/**
+ * ice_parse_ieee_app_tlv
+ * @tlv: IEEE 802.1Qaz APP TLV
+ * @dcbcfg: Local store to update APP PRIO data
+ *
+ * Parses IEEE 802.1Qaz APP PRIO TLV
+ */
+static void
+ice_parse_ieee_app_tlv(struct ice_lldp_org_tlv *tlv,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ u16 offset = 0;
+ u16 typelen;
+ int i = 0;
+ u16 len;
+ u8 *buf;
+
+ typelen = ntohs(tlv->typelen);
+ len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+ buf = tlv->tlvinfo;
+
+ /* Removing sizeof(ouisubtype) and reserved byte from len.
+ * Remaining len div 3 is number of APP TLVs.
+ */
+ len -= (sizeof(tlv->ouisubtype) + 1);
+
+ /* Move offset to App Priority Table */
+ offset++;
+
+ /* Application Priority Table (3 octets)
+ * Octets:| 1 | 2 | 3 |
+ * -----------------------------------------
+ * |Priority|Rsrvd| Sel | Protocol ID |
+ * -----------------------------------------
+ * Bits:|23 21|20 19|18 16|15 0|
+ * -----------------------------------------
+ */
+ while (offset < len) {
+ dcbcfg->app[i].priority = ((buf[offset] &
+ ICE_IEEE_APP_PRIO_M) >>
+ ICE_IEEE_APP_PRIO_S);
+ dcbcfg->app[i].selector = ((buf[offset] &
+ ICE_IEEE_APP_SEL_M) >>
+ ICE_IEEE_APP_SEL_S);
+ dcbcfg->app[i].prot_id = (buf[offset + 1] << 0x8) |
+ buf[offset + 2];
+ /* Move to next app */
+ offset += 3;
+ i++;
+ if (i >= ICE_DCBX_MAX_APPS)
+ break;
+ }
+
+ dcbcfg->numapps = i;
+}
+
+/**
+ * ice_parse_ieee_tlv
+ * @tlv: IEEE 802.1Qaz TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Get the TLV subtype and send it to parsing function
+ * based on the subtype value
+ */
+static void
+ice_parse_ieee_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+ u32 ouisubtype;
+ u8 subtype;
+
+ ouisubtype = ntohl(tlv->ouisubtype);
+ subtype = (u8)((ouisubtype & ICE_LLDP_TLV_SUBTYPE_M) >>
+ ICE_LLDP_TLV_SUBTYPE_S);
+ switch (subtype) {
+ case ICE_IEEE_SUBTYPE_ETS_CFG:
+ ice_parse_ieee_etscfg_tlv(tlv, dcbcfg);
+ break;
+ case ICE_IEEE_SUBTYPE_ETS_REC:
+ ice_parse_ieee_etsrec_tlv(tlv, dcbcfg);
+ break;
+ case ICE_IEEE_SUBTYPE_PFC_CFG:
+ ice_parse_ieee_pfccfg_tlv(tlv, dcbcfg);
+ break;
+ case ICE_IEEE_SUBTYPE_APP_PRI:
+ ice_parse_ieee_app_tlv(tlv, dcbcfg);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * ice_parse_cee_pgcfg_tlv
+ * @tlv: CEE DCBX PG CFG TLV
+ * @dcbcfg: Local store to update ETS CFG data
+ *
+ * Parses CEE DCBX PG CFG TLV
+ */
+static void
+ice_parse_cee_pgcfg_tlv(struct ice_cee_feat_tlv *tlv,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ struct ice_dcb_ets_cfg *etscfg;
+ u8 *buf = tlv->tlvinfo;
+ u16 offset = 0;
+ int i;
+
+ etscfg = &dcbcfg->etscfg;
+
+ if (tlv->en_will_err & ICE_CEE_FEAT_TLV_WILLING_M)
+ etscfg->willing = 1;
+
+ etscfg->cbs = 0;
+ /* Priority Group Table (4 octets)
+ * Octets:| 1 | 2 | 3 | 4 |
+ * -----------------------------------------
+ * |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+ * -----------------------------------------
+ * Bits:|7 4|3 0|7 4|3 0|7 4|3 0|7 4|3 0|
+ * -----------------------------------------
+ */
+ for (i = 0; i < 4; i++) {
+ etscfg->prio_table[i * 2] =
+ ((buf[offset] & ICE_CEE_PGID_PRIO_1_M) >>
+ ICE_CEE_PGID_PRIO_1_S);
+ etscfg->prio_table[i * 2 + 1] =
+ ((buf[offset] & ICE_CEE_PGID_PRIO_0_M) >>
+ ICE_CEE_PGID_PRIO_0_S);
+ offset++;
+ }
+
+ /* PG Percentage Table (8 octets)
+ * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+ * ---------------------------------
+ * |pg0|pg1|pg2|pg3|pg4|pg5|pg6|pg7|
+ * ---------------------------------
+ */
+ ice_for_each_traffic_class(i) {
+ etscfg->tcbwtable[i] = buf[offset++];
+
+ if (etscfg->prio_table[i] == ICE_CEE_PGID_STRICT)
+ dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_STRICT;
+ else
+ dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_ETS;
+ }
+
+ /* Number of TCs supported (1 octet) */
+ etscfg->maxtcs = buf[offset];
+}
+
+/**
+ * ice_parse_cee_pfccfg_tlv
+ * @tlv: CEE DCBX PFC CFG TLV
+ * @dcbcfg: Local store to update PFC CFG data
+ *
+ * Parses CEE DCBX PFC CFG TLV
+ */
+static void
+ice_parse_cee_pfccfg_tlv(struct ice_cee_feat_tlv *tlv,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ u8 *buf = tlv->tlvinfo;
+
+ if (tlv->en_will_err & ICE_CEE_FEAT_TLV_WILLING_M)
+ dcbcfg->pfc.willing = 1;
+
+ /* ------------------------
+ * | PFC Enable | PFC TCs |
+ * ------------------------
+ * | 1 octet | 1 octet |
+ */
+ dcbcfg->pfc.pfcena = buf[0];
+ dcbcfg->pfc.pfccap = buf[1];
+}
+
+/**
+ * ice_parse_cee_app_tlv
+ * @tlv: CEE DCBX APP TLV
+ * @dcbcfg: Local store to update APP PRIO data
+ *
+ * Parses CEE DCBX APP PRIO TLV
+ */
+static void
+ice_parse_cee_app_tlv(struct ice_cee_feat_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+ u16 len, typelen, offset = 0;
+ struct ice_cee_app_prio *app;
+ u8 i;
+
+ typelen = ntohs(tlv->hdr.typelen);
+ len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+
+ dcbcfg->numapps = len / sizeof(*app);
+ if (!dcbcfg->numapps)
+ return;
+ if (dcbcfg->numapps > ICE_DCBX_MAX_APPS)
+ dcbcfg->numapps = ICE_DCBX_MAX_APPS;
+
+ for (i = 0; i < dcbcfg->numapps; i++) {
+ u8 up, selector;
+
+ app = (struct ice_cee_app_prio *)(tlv->tlvinfo + offset);
+ for (up = 0; up < ICE_MAX_USER_PRIORITY; up++)
+ if (app->prio_map & BIT(up))
+ break;
+
+ dcbcfg->app[i].priority = up;
+
+ /* Get Selector from lower 2 bits, and convert to IEEE */
+ selector = (app->upper_oui_sel & ICE_CEE_APP_SELECTOR_M);
+ switch (selector) {
+ case ICE_CEE_APP_SEL_ETHTYPE:
+ dcbcfg->app[i].selector = ICE_APP_SEL_ETHTYPE;
+ break;
+ case ICE_CEE_APP_SEL_TCPIP:
+ dcbcfg->app[i].selector = ICE_APP_SEL_TCPIP;
+ break;
+ default:
+ /* Keep selector as it is for unknown types */
+ dcbcfg->app[i].selector = selector;
+ }
+
+ dcbcfg->app[i].prot_id = ntohs(app->protocol);
+ /* Move to next app */
+ offset += sizeof(*app);
+ }
+}
+
+/**
+ * ice_parse_cee_tlv
+ * @tlv: CEE DCBX TLV
+ * @dcbcfg: Local store to update DCBX config data
+ *
+ * Get the TLV subtype and send it to parsing function
+ * based on the subtype value
+ */
+static void
+ice_parse_cee_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+ struct ice_cee_feat_tlv *sub_tlv;
+ u8 subtype, feat_tlv_count = 0;
+ u16 len, tlvlen, typelen;
+ u32 ouisubtype;
+
+ ouisubtype = ntohl(tlv->ouisubtype);
+ subtype = (u8)((ouisubtype & ICE_LLDP_TLV_SUBTYPE_M) >>
+ ICE_LLDP_TLV_SUBTYPE_S);
+ /* Return if not CEE DCBX */
+ if (subtype != ICE_CEE_DCBX_TYPE)
+ return;
+
+ typelen = ntohs(tlv->typelen);
+ tlvlen = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+ len = sizeof(tlv->typelen) + sizeof(ouisubtype) +
+ sizeof(struct ice_cee_ctrl_tlv);
+ /* Return if no CEE DCBX Feature TLVs */
+ if (tlvlen <= len)
+ return;
+
+ sub_tlv = (struct ice_cee_feat_tlv *)((char *)tlv + len);
+ while (feat_tlv_count < ICE_CEE_MAX_FEAT_TYPE) {
+ u16 sublen;
+
+ typelen = ntohs(sub_tlv->hdr.typelen);
+ sublen = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+ subtype = (u8)((typelen & ICE_LLDP_TLV_TYPE_M) >>
+ ICE_LLDP_TLV_TYPE_S);
+ switch (subtype) {
+ case ICE_CEE_SUBTYPE_PG_CFG:
+ ice_parse_cee_pgcfg_tlv(sub_tlv, dcbcfg);
+ break;
+ case ICE_CEE_SUBTYPE_PFC_CFG:
+ ice_parse_cee_pfccfg_tlv(sub_tlv, dcbcfg);
+ break;
+ case ICE_CEE_SUBTYPE_APP_PRI:
+ ice_parse_cee_app_tlv(sub_tlv, dcbcfg);
+ break;
+ default:
+ return; /* Invalid Sub-type return */
+ }
+ feat_tlv_count++;
+ /* Move to next sub TLV */
+ sub_tlv = (struct ice_cee_feat_tlv *)
+ ((char *)sub_tlv + sizeof(sub_tlv->hdr.typelen) +
+ sublen);
+ }
+}
+
+/**
+ * ice_parse_org_tlv
+ * @tlv: Organization specific TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Currently only IEEE 802.1Qaz TLV is supported, all others
+ * will be returned
+ */
+static void
+ice_parse_org_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+ u32 ouisubtype;
+ u32 oui;
+
+ ouisubtype = ntohl(tlv->ouisubtype);
+ oui = ((ouisubtype & ICE_LLDP_TLV_OUI_M) >> ICE_LLDP_TLV_OUI_S);
+ switch (oui) {
+ case ICE_IEEE_8021QAZ_OUI:
+ ice_parse_ieee_tlv(tlv, dcbcfg);
+ break;
+ case ICE_CEE_DCBX_OUI:
+ ice_parse_cee_tlv(tlv, dcbcfg);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * ice_lldp_to_dcb_cfg
+ * @lldpmib: LLDPDU to be parsed
+ * @dcbcfg: store for LLDPDU data
+ *
+ * Parse DCB configuration from the LLDPDU
+ */
+static int ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg)
+{
+ struct ice_lldp_org_tlv *tlv;
+ u16 offset = 0;
+ int ret = 0;
+ u16 typelen;
+ u16 type;
+ u16 len;
+
+ if (!lldpmib || !dcbcfg)
+ return -EINVAL;
+
+ /* set to the start of LLDPDU */
+ lldpmib += ETH_HLEN;
+ tlv = (struct ice_lldp_org_tlv *)lldpmib;
+ while (1) {
+ typelen = ntohs(tlv->typelen);
+ type = ((typelen & ICE_LLDP_TLV_TYPE_M) >> ICE_LLDP_TLV_TYPE_S);
+ len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+ offset += sizeof(typelen) + len;
+
+ /* END TLV or beyond LLDPDU size */
+ if (type == ICE_TLV_TYPE_END || offset > ICE_LLDPDU_SIZE)
+ break;
+
+ switch (type) {
+ case ICE_TLV_TYPE_ORG:
+ ice_parse_org_tlv(tlv, dcbcfg);
+ break;
+ default:
+ break;
+ }
+
+ /* Move to next TLV */
+ tlv = (struct ice_lldp_org_tlv *)
+ ((char *)tlv + sizeof(tlv->typelen) + len);
+ }
+
+ return ret;
+}
+
+/**
+ * ice_aq_get_dcb_cfg
+ * @hw: pointer to the HW struct
+ * @mib_type: MIB type for the query
+ * @bridgetype: bridge type for the query (remote)
+ * @dcbcfg: store for LLDPDU data
+ *
+ * Query DCB configuration from the firmware
+ */
+int
+ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ u8 *lldpmib;
+ int ret;
+
+ /* Allocate the LLDPDU */
+ lldpmib = devm_kzalloc(ice_hw_to_dev(hw), ICE_LLDPDU_SIZE, GFP_KERNEL);
+ if (!lldpmib)
+ return -ENOMEM;
+
+ ret = ice_aq_get_lldp_mib(hw, bridgetype, mib_type, (void *)lldpmib,
+ ICE_LLDPDU_SIZE, NULL, NULL, NULL);
+
+ if (!ret)
+ /* Parse LLDP MIB to get DCB configuration */
+ ret = ice_lldp_to_dcb_cfg(lldpmib, dcbcfg);
+
+ devm_kfree(ice_hw_to_dev(hw), lldpmib);
+
+ return ret;
+}
+
+/**
+ * ice_aq_start_stop_dcbx - Start/Stop DCBX service in FW
+ * @hw: pointer to the HW struct
+ * @start_dcbx_agent: True if DCBX Agent needs to be started
+ * False if DCBX Agent needs to be stopped
+ * @dcbx_agent_status: FW indicates back the DCBX agent status
+ * True if DCBX Agent is active
+ * False if DCBX Agent is stopped
+ * @cd: pointer to command details structure or NULL
+ *
+ * Start/Stop the embedded dcbx Agent. In case that this wrapper function
+ * returns 0, caller will need to check if FW returns back the same
+ * value as stated in dcbx_agent_status, and react accordingly. (0x0A09)
+ */
+int
+ice_aq_start_stop_dcbx(struct ice_hw *hw, bool start_dcbx_agent,
+ bool *dcbx_agent_status, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_lldp_stop_start_specific_agent *cmd;
+ struct ice_aq_desc desc;
+ u16 opcode;
+ int status;
+
+ cmd = &desc.params.lldp_agent_ctrl;
+
+ opcode = ice_aqc_opc_lldp_stop_start_specific_agent;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, opcode);
+
+ if (start_dcbx_agent)
+ cmd->command = ICE_AQC_START_STOP_AGENT_START_DCBX;
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+
+ *dcbx_agent_status = false;
+
+ if (!status &&
+ cmd->command == ICE_AQC_START_STOP_AGENT_START_DCBX)
+ *dcbx_agent_status = true;
+
+ return status;
+}
+
+/**
+ * ice_aq_get_cee_dcb_cfg
+ * @hw: pointer to the HW struct
+ * @buff: response buffer that stores CEE operational configuration
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get CEE DCBX mode operational configuration from firmware (0x0A07)
+ */
+static int
+ice_aq_get_cee_dcb_cfg(struct ice_hw *hw,
+ struct ice_aqc_get_cee_dcb_cfg_resp *buff,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aq_desc desc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cee_dcb_cfg);
+
+ return ice_aq_send_cmd(hw, &desc, (void *)buff, sizeof(*buff), cd);
+}
+
+/**
+ * ice_aq_set_pfc_mode - Set PFC mode
+ * @hw: pointer to the HW struct
+ * @pfc_mode: value of PFC mode to set
+ * @cd: pointer to command details structure or NULL
+ *
+ * This AQ call configures the PFC mode to DSCP-based PFC mode or
+ * VLAN-based PFC (0x0303)
+ */
+int ice_aq_set_pfc_mode(struct ice_hw *hw, u8 pfc_mode, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_set_query_pfc_mode *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ if (pfc_mode > ICE_AQC_PFC_DSCP_BASED_PFC)
+ return -EINVAL;
+
+ cmd = &desc.params.set_query_pfc_mode;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_pfc_mode);
+
+ cmd->pfc_mode = pfc_mode;
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+ if (status)
+ return status;
+
+ /* FW will write the PFC mode set back into cmd->pfc_mode, but if DCB is
+ * disabled, FW will write back 0 to cmd->pfc_mode. After the AQ has
+ * been executed, check if cmd->pfc_mode is what was requested. If not,
+ * return an error.
+ */
+ if (cmd->pfc_mode != pfc_mode)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+/**
+ * ice_cee_to_dcb_cfg
+ * @cee_cfg: pointer to CEE configuration struct
+ * @pi: port information structure
+ *
+ * Convert CEE configuration from firmware to DCB configuration
+ */
+static void
+ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg,
+ struct ice_port_info *pi)
+{
+ u32 status, tlv_status = le32_to_cpu(cee_cfg->tlv_status);
+ u32 ice_aqc_cee_status_mask, ice_aqc_cee_status_shift, j;
+ u8 i, err, sync, oper, app_index, ice_app_sel_type;
+ u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio);
+ u16 ice_aqc_cee_app_mask, ice_aqc_cee_app_shift;
+ struct ice_dcbx_cfg *cmp_dcbcfg, *dcbcfg;
+ u16 ice_app_prot_id_type;
+
+ dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
+ dcbcfg->dcbx_mode = ICE_DCBX_MODE_CEE;
+ dcbcfg->tlv_status = tlv_status;
+
+ /* CEE PG data */
+ dcbcfg->etscfg.maxtcs = cee_cfg->oper_num_tc;
+
+ /* Note that the FW creates the oper_prio_tc nibbles reversed
+ * from those in the CEE Priority Group sub-TLV.
+ */
+ for (i = 0; i < ICE_MAX_TRAFFIC_CLASS / 2; i++) {
+ dcbcfg->etscfg.prio_table[i * 2] =
+ ((cee_cfg->oper_prio_tc[i] & ICE_CEE_PGID_PRIO_0_M) >>
+ ICE_CEE_PGID_PRIO_0_S);
+ dcbcfg->etscfg.prio_table[i * 2 + 1] =
+ ((cee_cfg->oper_prio_tc[i] & ICE_CEE_PGID_PRIO_1_M) >>
+ ICE_CEE_PGID_PRIO_1_S);
+ }
+
+ ice_for_each_traffic_class(i) {
+ dcbcfg->etscfg.tcbwtable[i] = cee_cfg->oper_tc_bw[i];
+
+ if (dcbcfg->etscfg.prio_table[i] == ICE_CEE_PGID_STRICT) {
+ /* Map it to next empty TC */
+ dcbcfg->etscfg.prio_table[i] = cee_cfg->oper_num_tc - 1;
+ dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_STRICT;
+ } else {
+ dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_ETS;
+ }
+ }
+
+ /* CEE PFC data */
+ dcbcfg->pfc.pfcena = cee_cfg->oper_pfc_en;
+ dcbcfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS;
+
+ /* CEE APP TLV data */
+ if (dcbcfg->app_mode == ICE_DCBX_APPS_NON_WILLING)
+ cmp_dcbcfg = &pi->qos_cfg.desired_dcbx_cfg;
+ else
+ cmp_dcbcfg = &pi->qos_cfg.remote_dcbx_cfg;
+
+ app_index = 0;
+ for (i = 0; i < 3; i++) {
+ if (i == 0) {
+ /* FCoE APP */
+ ice_aqc_cee_status_mask = ICE_AQC_CEE_FCOE_STATUS_M;
+ ice_aqc_cee_status_shift = ICE_AQC_CEE_FCOE_STATUS_S;
+ ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FCOE_M;
+ ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FCOE_S;
+ ice_app_sel_type = ICE_APP_SEL_ETHTYPE;
+ ice_app_prot_id_type = ETH_P_FCOE;
+ } else if (i == 1) {
+ /* iSCSI APP */
+ ice_aqc_cee_status_mask = ICE_AQC_CEE_ISCSI_STATUS_M;
+ ice_aqc_cee_status_shift = ICE_AQC_CEE_ISCSI_STATUS_S;
+ ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_ISCSI_M;
+ ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_ISCSI_S;
+ ice_app_sel_type = ICE_APP_SEL_TCPIP;
+ ice_app_prot_id_type = ISCSI_LISTEN_PORT;
+
+ for (j = 0; j < cmp_dcbcfg->numapps; j++) {
+ u16 prot_id = cmp_dcbcfg->app[j].prot_id;
+ u8 sel = cmp_dcbcfg->app[j].selector;
+
+ if (sel == ICE_APP_SEL_TCPIP &&
+ (prot_id == ISCSI_LISTEN_PORT ||
+ prot_id == ICE_APP_PROT_ID_ISCSI_860)) {
+ ice_app_prot_id_type = prot_id;
+ break;
+ }
+ }
+ } else {
+ /* FIP APP */
+ ice_aqc_cee_status_mask = ICE_AQC_CEE_FIP_STATUS_M;
+ ice_aqc_cee_status_shift = ICE_AQC_CEE_FIP_STATUS_S;
+ ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FIP_M;
+ ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FIP_S;
+ ice_app_sel_type = ICE_APP_SEL_ETHTYPE;
+ ice_app_prot_id_type = ETH_P_FIP;
+ }
+
+ status = (tlv_status & ice_aqc_cee_status_mask) >>
+ ice_aqc_cee_status_shift;
+ err = (status & ICE_TLV_STATUS_ERR) ? 1 : 0;
+ sync = (status & ICE_TLV_STATUS_SYNC) ? 1 : 0;
+ oper = (status & ICE_TLV_STATUS_OPER) ? 1 : 0;
+ /* Add FCoE/iSCSI/FIP APP if Error is False and
+ * Oper/Sync is True
+ */
+ if (!err && sync && oper) {
+ dcbcfg->app[app_index].priority =
+ (app_prio & ice_aqc_cee_app_mask) >>
+ ice_aqc_cee_app_shift;
+ dcbcfg->app[app_index].selector = ice_app_sel_type;
+ dcbcfg->app[app_index].prot_id = ice_app_prot_id_type;
+ app_index++;
+ }
+ }
+
+ dcbcfg->numapps = app_index;
+}
+
+/**
+ * ice_get_ieee_or_cee_dcb_cfg
+ * @pi: port information structure
+ * @dcbx_mode: mode of DCBX (IEEE or CEE)
+ *
+ * Get IEEE or CEE mode DCB configuration from the Firmware
+ */
+static int ice_get_ieee_or_cee_dcb_cfg(struct ice_port_info *pi, u8 dcbx_mode)
+{
+ struct ice_dcbx_cfg *dcbx_cfg = NULL;
+ int ret;
+
+ if (!pi)
+ return -EINVAL;
+
+ if (dcbx_mode == ICE_DCBX_MODE_IEEE)
+ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
+ else if (dcbx_mode == ICE_DCBX_MODE_CEE)
+ dcbx_cfg = &pi->qos_cfg.desired_dcbx_cfg;
+
+ /* Get Local DCB Config in case of ICE_DCBX_MODE_IEEE
+ * or get CEE DCB Desired Config in case of ICE_DCBX_MODE_CEE
+ */
+ ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_LOCAL,
+ ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbx_cfg);
+ if (ret)
+ goto out;
+
+ /* Get Remote DCB Config */
+ dcbx_cfg = &pi->qos_cfg.remote_dcbx_cfg;
+ ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
+ ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbx_cfg);
+ /* Don't treat ENOENT as an error for Remote MIBs */
+ if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/**
+ * ice_get_dcb_cfg
+ * @pi: port information structure
+ *
+ * Get DCB configuration from the Firmware
+ */
+int ice_get_dcb_cfg(struct ice_port_info *pi)
+{
+ struct ice_aqc_get_cee_dcb_cfg_resp cee_cfg;
+ struct ice_dcbx_cfg *dcbx_cfg;
+ int ret;
+
+ if (!pi)
+ return -EINVAL;
+
+ ret = ice_aq_get_cee_dcb_cfg(pi->hw, &cee_cfg, NULL);
+ if (!ret) {
+ /* CEE mode */
+ ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_CEE);
+ ice_cee_to_dcb_cfg(&cee_cfg, pi);
+ } else if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) {
+ /* CEE mode not enabled try querying IEEE data */
+ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
+ dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_IEEE;
+ ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_IEEE);
+ }
+
+ return ret;
+}
+
+/**
+ * ice_init_dcb
+ * @hw: pointer to the HW struct
+ * @enable_mib_change: enable MIB change event
+ *
+ * Update DCB configuration from the Firmware
+ */
+int ice_init_dcb(struct ice_hw *hw, bool enable_mib_change)
+{
+ struct ice_qos_cfg *qos_cfg = &hw->port_info->qos_cfg;
+ int ret = 0;
+
+ if (!hw->func_caps.common_cap.dcb)
+ return -EOPNOTSUPP;
+
+ qos_cfg->is_sw_lldp = true;
+
+ /* Get DCBX status */
+ qos_cfg->dcbx_status = ice_get_dcbx_status(hw);
+
+ if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DONE ||
+ qos_cfg->dcbx_status == ICE_DCBX_STATUS_IN_PROGRESS ||
+ qos_cfg->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) {
+ /* Get current DCBX configuration */
+ ret = ice_get_dcb_cfg(hw->port_info);
+ if (ret)
+ return ret;
+ qos_cfg->is_sw_lldp = false;
+ } else if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DIS) {
+ return -EBUSY;
+ }
+
+ /* Configure the LLDP MIB change event */
+ if (enable_mib_change) {
+ ret = ice_aq_cfg_lldp_mib_change(hw, true, NULL);
+ if (ret)
+ qos_cfg->is_sw_lldp = true;
+ }
+
+ return ret;
+}
+
+/**
+ * ice_cfg_lldp_mib_change
+ * @hw: pointer to the HW struct
+ * @ena_mib: enable/disable MIB change event
+ *
+ * Configure (disable/enable) MIB
+ */
+int ice_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_mib)
+{
+ struct ice_qos_cfg *qos_cfg = &hw->port_info->qos_cfg;
+ int ret;
+
+ if (!hw->func_caps.common_cap.dcb)
+ return -EOPNOTSUPP;
+
+ /* Get DCBX status */
+ qos_cfg->dcbx_status = ice_get_dcbx_status(hw);
+
+ if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DIS)
+ return -EBUSY;
+
+ ret = ice_aq_cfg_lldp_mib_change(hw, ena_mib, NULL);
+ if (!ret)
+ qos_cfg->is_sw_lldp = !ena_mib;
+
+ return ret;
+}
+
+/**
+ * ice_add_ieee_ets_common_tlv
+ * @buf: Data buffer to be populated with ice_dcb_ets_cfg data
+ * @ets_cfg: Container for ice_dcb_ets_cfg data
+ *
+ * Populate the TLV buffer with ice_dcb_ets_cfg data
+ */
+static void
+ice_add_ieee_ets_common_tlv(u8 *buf, struct ice_dcb_ets_cfg *ets_cfg)
+{
+ u8 priority0, priority1;
+ u8 offset = 0;
+ int i;
+
+ /* Priority Assignment Table (4 octets)
+ * Octets:| 1 | 2 | 3 | 4 |
+ * -----------------------------------------
+ * |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+ * -----------------------------------------
+ * Bits:|7 4|3 0|7 4|3 0|7 4|3 0|7 4|3 0|
+ * -----------------------------------------
+ */
+ for (i = 0; i < ICE_MAX_TRAFFIC_CLASS / 2; i++) {
+ priority0 = ets_cfg->prio_table[i * 2] & 0xF;
+ priority1 = ets_cfg->prio_table[i * 2 + 1] & 0xF;
+ buf[offset] = (priority0 << ICE_IEEE_ETS_PRIO_1_S) | priority1;
+ offset++;
+ }
+
+ /* TC Bandwidth Table (8 octets)
+ * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+ * ---------------------------------
+ * |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+ * ---------------------------------
+ *
+ * TSA Assignment Table (8 octets)
+ * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+ * ---------------------------------
+ * |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+ * ---------------------------------
+ */
+ ice_for_each_traffic_class(i) {
+ buf[offset] = ets_cfg->tcbwtable[i];
+ buf[ICE_MAX_TRAFFIC_CLASS + offset] = ets_cfg->tsatable[i];
+ offset++;
+ }
+}
+
+/**
+ * ice_add_ieee_ets_tlv - Prepare ETS TLV in IEEE format
+ * @tlv: Fill the ETS config data in IEEE format
+ * @dcbcfg: Local store which holds the DCB Config
+ *
+ * Prepare IEEE 802.1Qaz ETS CFG TLV
+ */
+static void
+ice_add_ieee_ets_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+ struct ice_dcb_ets_cfg *etscfg;
+ u8 *buf = tlv->tlvinfo;
+ u8 maxtcwilling = 0;
+ u32 ouisubtype;
+ u16 typelen;
+
+ typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+ ICE_IEEE_ETS_TLV_LEN);
+ tlv->typelen = htons(typelen);
+
+ ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_IEEE_SUBTYPE_ETS_CFG);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ /* First Octet post subtype
+ * --------------------------
+ * |will-|CBS | Re- | Max |
+ * |ing | |served| TCs |
+ * --------------------------
+ * |1bit | 1bit|3 bits|3bits|
+ */
+ etscfg = &dcbcfg->etscfg;
+ if (etscfg->willing)
+ maxtcwilling = BIT(ICE_IEEE_ETS_WILLING_S);
+ maxtcwilling |= etscfg->maxtcs & ICE_IEEE_ETS_MAXTC_M;
+ buf[0] = maxtcwilling;
+
+ /* Begin adding at Priority Assignment Table (offset 1 in buf) */
+ ice_add_ieee_ets_common_tlv(&buf[1], etscfg);
+}
+
+/**
+ * ice_add_ieee_etsrec_tlv - Prepare ETS Recommended TLV in IEEE format
+ * @tlv: Fill ETS Recommended TLV in IEEE format
+ * @dcbcfg: Local store which holds the DCB Config
+ *
+ * Prepare IEEE 802.1Qaz ETS REC TLV
+ */
+static void
+ice_add_ieee_etsrec_tlv(struct ice_lldp_org_tlv *tlv,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ struct ice_dcb_ets_cfg *etsrec;
+ u8 *buf = tlv->tlvinfo;
+ u32 ouisubtype;
+ u16 typelen;
+
+ typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+ ICE_IEEE_ETS_TLV_LEN);
+ tlv->typelen = htons(typelen);
+
+ ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_IEEE_SUBTYPE_ETS_REC);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ etsrec = &dcbcfg->etsrec;
+
+ /* First Octet is reserved */
+ /* Begin adding at Priority Assignment Table (offset 1 in buf) */
+ ice_add_ieee_ets_common_tlv(&buf[1], etsrec);
+}
+
+/**
+ * ice_add_ieee_pfc_tlv - Prepare PFC TLV in IEEE format
+ * @tlv: Fill PFC TLV in IEEE format
+ * @dcbcfg: Local store which holds the PFC CFG data
+ *
+ * Prepare IEEE 802.1Qaz PFC CFG TLV
+ */
+static void
+ice_add_ieee_pfc_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+ u8 *buf = tlv->tlvinfo;
+ u32 ouisubtype;
+ u16 typelen;
+
+ typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+ ICE_IEEE_PFC_TLV_LEN);
+ tlv->typelen = htons(typelen);
+
+ ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_IEEE_SUBTYPE_PFC_CFG);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ /* ----------------------------------------
+ * |will-|MBC | Re- | PFC | PFC Enable |
+ * |ing | |served| cap | |
+ * -----------------------------------------
+ * |1bit | 1bit|2 bits|4bits| 1 octet |
+ */
+ if (dcbcfg->pfc.willing)
+ buf[0] = BIT(ICE_IEEE_PFC_WILLING_S);
+
+ if (dcbcfg->pfc.mbc)
+ buf[0] |= BIT(ICE_IEEE_PFC_MBC_S);
+
+ buf[0] |= dcbcfg->pfc.pfccap & 0xF;
+ buf[1] = dcbcfg->pfc.pfcena;
+}
+
+/**
+ * ice_add_ieee_app_pri_tlv - Prepare APP TLV in IEEE format
+ * @tlv: Fill APP TLV in IEEE format
+ * @dcbcfg: Local store which holds the APP CFG data
+ *
+ * Prepare IEEE 802.1Qaz APP CFG TLV
+ */
+static void
+ice_add_ieee_app_pri_tlv(struct ice_lldp_org_tlv *tlv,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ u16 typelen, len, offset = 0;
+ u8 priority, selector, i = 0;
+ u8 *buf = tlv->tlvinfo;
+ u32 ouisubtype;
+
+ /* No APP TLVs then just return */
+ if (dcbcfg->numapps == 0)
+ return;
+ ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_IEEE_SUBTYPE_APP_PRI);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ /* Move offset to App Priority Table */
+ offset++;
+ /* Application Priority Table (3 octets)
+ * Octets:| 1 | 2 | 3 |
+ * -----------------------------------------
+ * |Priority|Rsrvd| Sel | Protocol ID |
+ * -----------------------------------------
+ * Bits:|23 21|20 19|18 16|15 0|
+ * -----------------------------------------
+ */
+ while (i < dcbcfg->numapps) {
+ priority = dcbcfg->app[i].priority & 0x7;
+ selector = dcbcfg->app[i].selector & 0x7;
+ buf[offset] = (priority << ICE_IEEE_APP_PRIO_S) | selector;
+ buf[offset + 1] = (dcbcfg->app[i].prot_id >> 0x8) & 0xFF;
+ buf[offset + 2] = dcbcfg->app[i].prot_id & 0xFF;
+ /* Move to next app */
+ offset += 3;
+ i++;
+ if (i >= ICE_DCBX_MAX_APPS)
+ break;
+ }
+ /* len includes size of ouisubtype + 1 reserved + 3*numapps */
+ len = sizeof(tlv->ouisubtype) + 1 + (i * 3);
+ typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) | (len & 0x1FF));
+ tlv->typelen = htons(typelen);
+}
+
+/**
+ * ice_add_dscp_up_tlv - Prepare DSCP to UP TLV
+ * @tlv: location to build the TLV data
+ * @dcbcfg: location of data to convert to TLV
+ */
+static void
+ice_add_dscp_up_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+ u8 *buf = tlv->tlvinfo;
+ u32 ouisubtype;
+ u16 typelen;
+ int i;
+
+ typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+ ICE_DSCP_UP_TLV_LEN);
+ tlv->typelen = htons(typelen);
+
+ ouisubtype = (u32)((ICE_DSCP_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_DSCP_SUBTYPE_DSCP2UP);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ /* bytes 0 - 63 - IPv4 DSCP2UP LUT */
+ for (i = 0; i < ICE_DSCP_NUM_VAL; i++) {
+ /* IPv4 mapping */
+ buf[i] = dcbcfg->dscp_map[i];
+ /* IPv6 mapping */
+ buf[i + ICE_DSCP_IPV6_OFFSET] = dcbcfg->dscp_map[i];
+ }
+
+ /* byte 64 - IPv4 untagged traffic */
+ buf[i] = 0;
+
+ /* byte 144 - IPv6 untagged traffic */
+ buf[i + ICE_DSCP_IPV6_OFFSET] = 0;
+}
+
+#define ICE_BYTES_PER_TC 8
+/**
+ * ice_add_dscp_enf_tlv - Prepare DSCP Enforcement TLV
+ * @tlv: location to build the TLV data
+ */
+static void
+ice_add_dscp_enf_tlv(struct ice_lldp_org_tlv *tlv)
+{
+ u8 *buf = tlv->tlvinfo;
+ u32 ouisubtype;
+ u16 typelen;
+
+ typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+ ICE_DSCP_ENF_TLV_LEN);
+ tlv->typelen = htons(typelen);
+
+ ouisubtype = (u32)((ICE_DSCP_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_DSCP_SUBTYPE_ENFORCE);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ /* Allow all DSCP values to be valid for all TC's (IPv4 and IPv6) */
+ memset(buf, 0, 2 * (ICE_MAX_TRAFFIC_CLASS * ICE_BYTES_PER_TC));
+}
+
+/**
+ * ice_add_dscp_tc_bw_tlv - Prepare DSCP BW for TC TLV
+ * @tlv: location to build the TLV data
+ * @dcbcfg: location of the data to convert to TLV
+ */
+static void
+ice_add_dscp_tc_bw_tlv(struct ice_lldp_org_tlv *tlv,
+ struct ice_dcbx_cfg *dcbcfg)
+{
+ struct ice_dcb_ets_cfg *etscfg;
+ u8 *buf = tlv->tlvinfo;
+ u32 ouisubtype;
+ u8 offset = 0;
+ u16 typelen;
+ int i;
+
+ typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+ ICE_DSCP_TC_BW_TLV_LEN);
+ tlv->typelen = htons(typelen);
+
+ ouisubtype = (u32)((ICE_DSCP_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_DSCP_SUBTYPE_TCBW);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ /* First Octect after subtype
+ * ----------------------------
+ * | RSV | CBS | RSV | Max TCs |
+ * | 1b | 1b | 3b | 3b |
+ * ----------------------------
+ */
+ etscfg = &dcbcfg->etscfg;
+ buf[0] = etscfg->maxtcs & ICE_IEEE_ETS_MAXTC_M;
+
+ /* bytes 1 - 4 reserved */
+ offset = 5;
+
+ /* TC BW table
+ * bytes 0 - 7 for TC 0 - 7
+ *
+ * TSA Assignment table
+ * bytes 8 - 15 for TC 0 - 7
+ */
+ for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+ buf[offset] = etscfg->tcbwtable[i];
+ buf[offset + ICE_MAX_TRAFFIC_CLASS] = etscfg->tsatable[i];
+ offset++;
+ }
+}
+
+/**
+ * ice_add_dscp_pfc_tlv - Prepare DSCP PFC TLV
+ * @tlv: Fill PFC TLV in IEEE format
+ * @dcbcfg: Local store which holds the PFC CFG data
+ */
+static void
+ice_add_dscp_pfc_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+ u8 *buf = tlv->tlvinfo;
+ u32 ouisubtype;
+ u16 typelen;
+
+ typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+ ICE_DSCP_PFC_TLV_LEN);
+ tlv->typelen = htons(typelen);
+
+ ouisubtype = (u32)((ICE_DSCP_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_DSCP_SUBTYPE_PFC);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ buf[0] = dcbcfg->pfc.pfccap & 0xF;
+ buf[1] = dcbcfg->pfc.pfcena;
+}
+
+/**
+ * ice_add_dcb_tlv - Add all IEEE or DSCP TLVs
+ * @tlv: Fill TLV data in IEEE format
+ * @dcbcfg: Local store which holds the DCB Config
+ * @tlvid: Type of IEEE TLV
+ *
+ * Add tlv information
+ */
+static void
+ice_add_dcb_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg,
+ u16 tlvid)
+{
+ if (dcbcfg->pfc_mode == ICE_QOS_MODE_VLAN) {
+ switch (tlvid) {
+ case ICE_IEEE_TLV_ID_ETS_CFG:
+ ice_add_ieee_ets_tlv(tlv, dcbcfg);
+ break;
+ case ICE_IEEE_TLV_ID_ETS_REC:
+ ice_add_ieee_etsrec_tlv(tlv, dcbcfg);
+ break;
+ case ICE_IEEE_TLV_ID_PFC_CFG:
+ ice_add_ieee_pfc_tlv(tlv, dcbcfg);
+ break;
+ case ICE_IEEE_TLV_ID_APP_PRI:
+ ice_add_ieee_app_pri_tlv(tlv, dcbcfg);
+ break;
+ default:
+ break;
+ }
+ } else {
+ /* pfc_mode == ICE_QOS_MODE_DSCP */
+ switch (tlvid) {
+ case ICE_TLV_ID_DSCP_UP:
+ ice_add_dscp_up_tlv(tlv, dcbcfg);
+ break;
+ case ICE_TLV_ID_DSCP_ENF:
+ ice_add_dscp_enf_tlv(tlv);
+ break;
+ case ICE_TLV_ID_DSCP_TC_BW:
+ ice_add_dscp_tc_bw_tlv(tlv, dcbcfg);
+ break;
+ case ICE_TLV_ID_DSCP_TO_PFC:
+ ice_add_dscp_pfc_tlv(tlv, dcbcfg);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+/**
+ * ice_dcb_cfg_to_lldp - Convert DCB configuration to MIB format
+ * @lldpmib: pointer to the HW struct
+ * @miblen: length of LLDP MIB
+ * @dcbcfg: Local store which holds the DCB Config
+ *
+ * Convert the DCB configuration to MIB format
+ */
+static void
+ice_dcb_cfg_to_lldp(u8 *lldpmib, u16 *miblen, struct ice_dcbx_cfg *dcbcfg)
+{
+ u16 len, offset = 0, tlvid = ICE_TLV_ID_START;
+ struct ice_lldp_org_tlv *tlv;
+ u16 typelen;
+
+ tlv = (struct ice_lldp_org_tlv *)lldpmib;
+ while (1) {
+ ice_add_dcb_tlv(tlv, dcbcfg, tlvid++);
+ typelen = ntohs(tlv->typelen);
+ len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
+ if (len)
+ offset += len + 2;
+ /* END TLV or beyond LLDPDU size */
+ if (tlvid >= ICE_TLV_ID_END_OF_LLDPPDU ||
+ offset > ICE_LLDPDU_SIZE)
+ break;
+ /* Move to next TLV */
+ if (len)
+ tlv = (struct ice_lldp_org_tlv *)
+ ((char *)tlv + sizeof(tlv->typelen) + len);
+ }
+ *miblen = offset;
+}
+
+/**
+ * ice_set_dcb_cfg - Set the local LLDP MIB to FW
+ * @pi: port information structure
+ *
+ * Set DCB configuration to the Firmware
+ */
+int ice_set_dcb_cfg(struct ice_port_info *pi)
+{
+ u8 mib_type, *lldpmib = NULL;
+ struct ice_dcbx_cfg *dcbcfg;
+ struct ice_hw *hw;
+ u16 miblen;
+ int ret;
+
+ if (!pi)
+ return -EINVAL;
+
+ hw = pi->hw;
+
+ /* update the HW local config */
+ dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
+ /* Allocate the LLDPDU */
+ lldpmib = devm_kzalloc(ice_hw_to_dev(hw), ICE_LLDPDU_SIZE, GFP_KERNEL);
+ if (!lldpmib)
+ return -ENOMEM;
+
+ mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB;
+ if (dcbcfg->app_mode == ICE_DCBX_APPS_NON_WILLING)
+ mib_type |= SET_LOCAL_MIB_TYPE_CEE_NON_WILLING;
+
+ ice_dcb_cfg_to_lldp(lldpmib, &miblen, dcbcfg);
+ ret = ice_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, miblen,
+ NULL);
+
+ devm_kfree(ice_hw_to_dev(hw), lldpmib);
+
+ return ret;
+}
+
+/**
+ * ice_aq_query_port_ets - query port ETS configuration
+ * @pi: port information structure
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @cd: pointer to command details structure or NULL
+ *
+ * query current port ETS configuration
+ */
+static int
+ice_aq_query_port_ets(struct ice_port_info *pi,
+ struct ice_aqc_port_ets_elem *buf, u16 buf_size,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_query_port_ets *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ if (!pi)
+ return -EINVAL;
+ cmd = &desc.params.port_ets;
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_port_ets);
+ cmd->port_teid = pi->root->info.node_teid;
+
+ status = ice_aq_send_cmd(pi->hw, &desc, buf, buf_size, cd);
+ return status;
+}
+
+/**
+ * ice_update_port_tc_tree_cfg - update TC tree configuration
+ * @pi: port information structure
+ * @buf: pointer to buffer
+ *
+ * update the SW DB with the new TC changes
+ */
+static int
+ice_update_port_tc_tree_cfg(struct ice_port_info *pi,
+ struct ice_aqc_port_ets_elem *buf)
+{
+ struct ice_sched_node *node, *tc_node;
+ struct ice_aqc_txsched_elem_data elem;
+ u32 teid1, teid2;
+ int status = 0;
+ u8 i, j;
+
+ if (!pi)
+ return -EINVAL;
+ /* suspend the missing TC nodes */
+ for (i = 0; i < pi->root->num_children; i++) {
+ teid1 = le32_to_cpu(pi->root->children[i]->info.node_teid);
+ ice_for_each_traffic_class(j) {
+ teid2 = le32_to_cpu(buf->tc_node_teid[j]);
+ if (teid1 == teid2)
+ break;
+ }
+ if (j < ICE_MAX_TRAFFIC_CLASS)
+ continue;
+ /* TC is missing */
+ pi->root->children[i]->in_use = false;
+ }
+ /* add the new TC nodes */
+ ice_for_each_traffic_class(j) {
+ teid2 = le32_to_cpu(buf->tc_node_teid[j]);
+ if (teid2 == ICE_INVAL_TEID)
+ continue;
+ /* Is it already present in the tree ? */
+ for (i = 0; i < pi->root->num_children; i++) {
+ tc_node = pi->root->children[i];
+ if (!tc_node)
+ continue;
+ teid1 = le32_to_cpu(tc_node->info.node_teid);
+ if (teid1 == teid2) {
+ tc_node->tc_num = j;
+ tc_node->in_use = true;
+ break;
+ }
+ }
+ if (i < pi->root->num_children)
+ continue;
+ /* new TC */
+ status = ice_sched_query_elem(pi->hw, teid2, &elem);
+ if (!status)
+ status = ice_sched_add_node(pi, 1, &elem);
+ if (status)
+ break;
+ /* update the TC number */
+ node = ice_sched_find_node_by_teid(pi->root, teid2);
+ if (node)
+ node->tc_num = j;
+ }
+ return status;
+}
+
+/**
+ * ice_query_port_ets - query port ETS configuration
+ * @pi: port information structure
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @cd: pointer to command details structure or NULL
+ *
+ * query current port ETS configuration and update the
+ * SW DB with the TC changes
+ */
+int
+ice_query_port_ets(struct ice_port_info *pi,
+ struct ice_aqc_port_ets_elem *buf, u16 buf_size,
+ struct ice_sq_cd *cd)
+{
+ int status;
+
+ mutex_lock(&pi->sched_lock);
+ status = ice_aq_query_port_ets(pi, buf, buf_size, cd);
+ if (!status)
+ status = ice_update_port_tc_tree_cfg(pi, buf);
+ mutex_unlock(&pi->sched_lock);
+ return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.h b/drivers/net/ethernet/intel/ice/ice_dcb.h
new file mode 100644
index 000000000..6abf28a14
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.h
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DCB_H_
+#define _ICE_DCB_H_
+
+#include "ice_type.h"
+#include <scsi/iscsi_proto.h>
+
+#define ICE_DCBX_STATUS_NOT_STARTED 0
+#define ICE_DCBX_STATUS_IN_PROGRESS 1
+#define ICE_DCBX_STATUS_DONE 2
+#define ICE_DCBX_STATUS_DIS 7
+
+#define ICE_TLV_TYPE_END 0
+#define ICE_TLV_TYPE_ORG 127
+
+#define ICE_IEEE_8021QAZ_OUI 0x0080C2
+#define ICE_IEEE_SUBTYPE_ETS_CFG 9
+#define ICE_IEEE_SUBTYPE_ETS_REC 10
+#define ICE_IEEE_SUBTYPE_PFC_CFG 11
+#define ICE_IEEE_SUBTYPE_APP_PRI 12
+
+#define ICE_CEE_DCBX_OUI 0x001B21
+#define ICE_CEE_DCBX_TYPE 2
+
+#define ICE_DSCP_OUI 0xFFFFFF
+#define ICE_DSCP_SUBTYPE_DSCP2UP 0x41
+#define ICE_DSCP_SUBTYPE_ENFORCE 0x42
+#define ICE_DSCP_SUBTYPE_TCBW 0x43
+#define ICE_DSCP_SUBTYPE_PFC 0x44
+#define ICE_DSCP_IPV6_OFFSET 80
+
+#define ICE_CEE_SUBTYPE_PG_CFG 2
+#define ICE_CEE_SUBTYPE_PFC_CFG 3
+#define ICE_CEE_SUBTYPE_APP_PRI 4
+#define ICE_CEE_MAX_FEAT_TYPE 3
+/* Defines for LLDP TLV header */
+#define ICE_LLDP_TLV_LEN_S 0
+#define ICE_LLDP_TLV_LEN_M (0x01FF << ICE_LLDP_TLV_LEN_S)
+#define ICE_LLDP_TLV_TYPE_S 9
+#define ICE_LLDP_TLV_TYPE_M (0x7F << ICE_LLDP_TLV_TYPE_S)
+#define ICE_LLDP_TLV_SUBTYPE_S 0
+#define ICE_LLDP_TLV_SUBTYPE_M (0xFF << ICE_LLDP_TLV_SUBTYPE_S)
+#define ICE_LLDP_TLV_OUI_S 8
+#define ICE_LLDP_TLV_OUI_M (0xFFFFFFUL << ICE_LLDP_TLV_OUI_S)
+
+/* Defines for IEEE ETS TLV */
+#define ICE_IEEE_ETS_MAXTC_S 0
+#define ICE_IEEE_ETS_MAXTC_M (0x7 << ICE_IEEE_ETS_MAXTC_S)
+#define ICE_IEEE_ETS_CBS_S 6
+#define ICE_IEEE_ETS_CBS_M BIT(ICE_IEEE_ETS_CBS_S)
+#define ICE_IEEE_ETS_WILLING_S 7
+#define ICE_IEEE_ETS_WILLING_M BIT(ICE_IEEE_ETS_WILLING_S)
+#define ICE_IEEE_ETS_PRIO_0_S 0
+#define ICE_IEEE_ETS_PRIO_0_M (0x7 << ICE_IEEE_ETS_PRIO_0_S)
+#define ICE_IEEE_ETS_PRIO_1_S 4
+#define ICE_IEEE_ETS_PRIO_1_M (0x7 << ICE_IEEE_ETS_PRIO_1_S)
+#define ICE_CEE_PGID_PRIO_0_S 0
+#define ICE_CEE_PGID_PRIO_0_M (0xF << ICE_CEE_PGID_PRIO_0_S)
+#define ICE_CEE_PGID_PRIO_1_S 4
+#define ICE_CEE_PGID_PRIO_1_M (0xF << ICE_CEE_PGID_PRIO_1_S)
+#define ICE_CEE_PGID_STRICT 15
+
+/* Defines for IEEE TSA types */
+#define ICE_IEEE_TSA_STRICT 0
+#define ICE_IEEE_TSA_ETS 2
+
+/* Defines for IEEE PFC TLV */
+#define ICE_IEEE_PFC_CAP_S 0
+#define ICE_IEEE_PFC_CAP_M (0xF << ICE_IEEE_PFC_CAP_S)
+#define ICE_IEEE_PFC_MBC_S 6
+#define ICE_IEEE_PFC_MBC_M BIT(ICE_IEEE_PFC_MBC_S)
+#define ICE_IEEE_PFC_WILLING_S 7
+#define ICE_IEEE_PFC_WILLING_M BIT(ICE_IEEE_PFC_WILLING_S)
+
+/* Defines for IEEE APP TLV */
+#define ICE_IEEE_APP_SEL_S 0
+#define ICE_IEEE_APP_SEL_M (0x7 << ICE_IEEE_APP_SEL_S)
+#define ICE_IEEE_APP_PRIO_S 5
+#define ICE_IEEE_APP_PRIO_M (0x7 << ICE_IEEE_APP_PRIO_S)
+
+/* TLV definitions for preparing MIB */
+#define ICE_IEEE_TLV_ID_ETS_CFG 3
+#define ICE_IEEE_TLV_ID_ETS_REC 4
+#define ICE_IEEE_TLV_ID_PFC_CFG 5
+#define ICE_IEEE_TLV_ID_APP_PRI 6
+#define ICE_TLV_ID_END_OF_LLDPPDU 7
+#define ICE_TLV_ID_START ICE_IEEE_TLV_ID_ETS_CFG
+#define ICE_TLV_ID_DSCP_UP 3
+#define ICE_TLV_ID_DSCP_ENF 4
+#define ICE_TLV_ID_DSCP_TC_BW 5
+#define ICE_TLV_ID_DSCP_TO_PFC 6
+
+#define ICE_IEEE_ETS_TLV_LEN 25
+#define ICE_IEEE_PFC_TLV_LEN 6
+#define ICE_IEEE_APP_TLV_LEN 11
+
+#define ICE_DSCP_UP_TLV_LEN 148
+#define ICE_DSCP_ENF_TLV_LEN 132
+#define ICE_DSCP_TC_BW_TLV_LEN 25
+#define ICE_DSCP_PFC_TLV_LEN 6
+
+/* IEEE 802.1AB LLDP Organization specific TLV */
+struct ice_lldp_org_tlv {
+ __be16 typelen;
+ __be32 ouisubtype;
+ u8 tlvinfo[];
+} __packed;
+
+struct ice_cee_tlv_hdr {
+ __be16 typelen;
+ u8 operver;
+ u8 maxver;
+};
+
+struct ice_cee_ctrl_tlv {
+ struct ice_cee_tlv_hdr hdr;
+ __be32 seqno;
+ __be32 ackno;
+};
+
+struct ice_cee_feat_tlv {
+ struct ice_cee_tlv_hdr hdr;
+ u8 en_will_err; /* Bits: |En|Will|Err|Reserved(5)| */
+#define ICE_CEE_FEAT_TLV_ENA_M 0x80
+#define ICE_CEE_FEAT_TLV_WILLING_M 0x40
+#define ICE_CEE_FEAT_TLV_ERR_M 0x20
+ u8 subtype;
+ u8 tlvinfo[];
+};
+
+struct ice_cee_app_prio {
+ __be16 protocol;
+ u8 upper_oui_sel; /* Bits: |Upper OUI(6)|Selector(2)| */
+#define ICE_CEE_APP_SELECTOR_M 0x03
+ __be16 lower_oui;
+ u8 prio_map;
+} __packed;
+
+int ice_aq_set_pfc_mode(struct ice_hw *hw, u8 pfc_mode, struct ice_sq_cd *cd);
+int
+ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
+ struct ice_dcbx_cfg *dcbcfg);
+int ice_get_dcb_cfg(struct ice_port_info *pi);
+int ice_set_dcb_cfg(struct ice_port_info *pi);
+int ice_init_dcb(struct ice_hw *hw, bool enable_mib_change);
+int
+ice_query_port_ets(struct ice_port_info *pi,
+ struct ice_aqc_port_ets_elem *buf, u16 buf_size,
+ struct ice_sq_cd *cmd_details);
+#ifdef CONFIG_DCB
+int
+ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, bool persist,
+ struct ice_sq_cd *cd);
+int ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd);
+int
+ice_aq_start_stop_dcbx(struct ice_hw *hw, bool start_dcbx_agent,
+ bool *dcbx_agent_status, struct ice_sq_cd *cd);
+int ice_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_mib);
+#else /* CONFIG_DCB */
+static inline int
+ice_aq_stop_lldp(struct ice_hw __always_unused *hw,
+ bool __always_unused shutdown_lldp_agent,
+ bool __always_unused persist,
+ struct ice_sq_cd __always_unused *cd)
+{
+ return 0;
+}
+
+static inline int
+ice_aq_start_lldp(struct ice_hw __always_unused *hw,
+ bool __always_unused persist,
+ struct ice_sq_cd __always_unused *cd)
+{
+ return 0;
+}
+
+static inline int
+ice_aq_start_stop_dcbx(struct ice_hw __always_unused *hw,
+ bool __always_unused start_dcbx_agent,
+ bool *dcbx_agent_status,
+ struct ice_sq_cd __always_unused *cd)
+{
+ *dcbx_agent_status = false;
+
+ return 0;
+}
+
+static inline int
+ice_cfg_lldp_mib_change(struct ice_hw __always_unused *hw,
+ bool __always_unused ena_mib)
+{
+ return 0;
+}
+
+#endif /* CONFIG_DCB */
+#endif /* _ICE_DCB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
new file mode 100644
index 000000000..9aa0437aa
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -0,0 +1,1048 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
+
+/**
+ * ice_dcb_get_ena_tc - return bitmap of enabled TCs
+ * @dcbcfg: DCB config to evaluate for enabled TCs
+ */
+static u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg)
+{
+ u8 i, num_tc, ena_tc = 1;
+
+ num_tc = ice_dcb_get_num_tc(dcbcfg);
+
+ for (i = 0; i < num_tc; i++)
+ ena_tc |= BIT(i);
+
+ return ena_tc;
+}
+
+/**
+ * ice_is_pfc_causing_hung_q
+ * @pf: pointer to PF structure
+ * @txqueue: Tx queue which is supposedly hung queue
+ *
+ * find if PFC is causing the hung queue, if yes return true else false
+ */
+bool ice_is_pfc_causing_hung_q(struct ice_pf *pf, unsigned int txqueue)
+{
+ u8 num_tcs = 0, i, tc, up_mapped_tc, up_in_tc = 0;
+ u64 ref_prio_xoff[ICE_MAX_UP];
+ struct ice_vsi *vsi;
+ u32 up2tc;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi)
+ return false;
+
+ ice_for_each_traffic_class(i)
+ if (vsi->tc_cfg.ena_tc & BIT(i))
+ num_tcs++;
+
+ /* first find out the TC to which the hung queue belongs to */
+ for (tc = 0; tc < num_tcs - 1; tc++)
+ if (ice_find_q_in_range(vsi->tc_cfg.tc_info[tc].qoffset,
+ vsi->tc_cfg.tc_info[tc + 1].qoffset,
+ txqueue))
+ break;
+
+ /* Build a bit map of all UPs associated to the suspect hung queue TC,
+ * so that we check for its counter increment.
+ */
+ up2tc = rd32(&pf->hw, PRTDCB_TUP2TC);
+ for (i = 0; i < ICE_MAX_UP; i++) {
+ up_mapped_tc = (up2tc >> (i * 3)) & 0x7;
+ if (up_mapped_tc == tc)
+ up_in_tc |= BIT(i);
+ }
+
+ /* Now that we figured out that hung queue is PFC enabled, still the
+ * Tx timeout can be legitimate. So to make sure Tx timeout is
+ * absolutely caused by PFC storm, check if the counters are
+ * incrementing.
+ */
+ for (i = 0; i < ICE_MAX_UP; i++)
+ if (up_in_tc & BIT(i))
+ ref_prio_xoff[i] = pf->stats.priority_xoff_rx[i];
+
+ ice_update_dcb_stats(pf);
+
+ for (i = 0; i < ICE_MAX_UP; i++)
+ if (up_in_tc & BIT(i))
+ if (pf->stats.priority_xoff_rx[i] > ref_prio_xoff[i])
+ return true;
+
+ return false;
+}
+
+/**
+ * ice_dcb_get_mode - gets the DCB mode
+ * @port_info: pointer to port info structure
+ * @host: if set it's HOST if not it's MANAGED
+ */
+static u8 ice_dcb_get_mode(struct ice_port_info *port_info, bool host)
+{
+ u8 mode;
+
+ if (host)
+ mode = DCB_CAP_DCBX_HOST;
+ else
+ mode = DCB_CAP_DCBX_LLD_MANAGED;
+
+ if (port_info->qos_cfg.local_dcbx_cfg.dcbx_mode & ICE_DCBX_MODE_CEE)
+ return mode | DCB_CAP_DCBX_VER_CEE;
+ else
+ return mode | DCB_CAP_DCBX_VER_IEEE;
+}
+
+/**
+ * ice_dcb_get_num_tc - Get the number of TCs from DCBX config
+ * @dcbcfg: config to retrieve number of TCs from
+ */
+u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
+{
+ bool tc_unused = false;
+ u8 num_tc = 0;
+ u8 ret = 0;
+ int i;
+
+ /* Scan the ETS Config Priority Table to find traffic classes
+ * enabled and create a bitmask of enabled TCs
+ */
+ for (i = 0; i < CEE_DCBX_MAX_PRIO; i++)
+ num_tc |= BIT(dcbcfg->etscfg.prio_table[i]);
+
+ /* Scan bitmask for contiguous TCs starting with TC0 */
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (num_tc & BIT(i)) {
+ if (!tc_unused) {
+ ret++;
+ } else {
+ pr_err("Non-contiguous TCs - Disabling DCB\n");
+ return 1;
+ }
+ } else {
+ tc_unused = true;
+ }
+ }
+
+ /* There is always at least 1 TC */
+ if (!ret)
+ ret = 1;
+
+ return ret;
+}
+
+/**
+ * ice_get_first_droptc - returns number of first droptc
+ * @vsi: used to find the first droptc
+ *
+ * This function returns the value of first_droptc.
+ * When DCB is enabled, first droptc information is derived from enabled_tc
+ * and PFC enabled bits. otherwise this function returns 0 as there is one
+ * TC without DCB (tc0)
+ */
+static u8 ice_get_first_droptc(struct ice_vsi *vsi)
+{
+ struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg;
+ struct device *dev = ice_pf_to_dev(vsi->back);
+ u8 num_tc, ena_tc_map, pfc_ena_map;
+ u8 i;
+
+ num_tc = ice_dcb_get_num_tc(cfg);
+
+ /* get bitmap of enabled TCs */
+ ena_tc_map = ice_dcb_get_ena_tc(cfg);
+
+ /* get bitmap of PFC enabled TCs */
+ pfc_ena_map = cfg->pfc.pfcena;
+
+ /* get first TC that is not PFC enabled */
+ for (i = 0; i < num_tc; i++) {
+ if ((ena_tc_map & BIT(i)) && (!(pfc_ena_map & BIT(i)))) {
+ dev_dbg(dev, "first drop tc = %d\n", i);
+ return i;
+ }
+ }
+
+ dev_dbg(dev, "first drop tc = 0\n");
+ return 0;
+}
+
+/**
+ * ice_vsi_set_dcb_tc_cfg - Set VSI's TC based on DCB configuration
+ * @vsi: pointer to the VSI instance
+ */
+void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi)
+{
+ struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg;
+
+ switch (vsi->type) {
+ case ICE_VSI_PF:
+ vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg);
+ vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg);
+ break;
+ case ICE_VSI_CHNL:
+ vsi->tc_cfg.ena_tc = BIT(ice_get_first_droptc(vsi));
+ vsi->tc_cfg.numtc = 1;
+ break;
+ case ICE_VSI_CTRL:
+ case ICE_VSI_LB:
+ default:
+ vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS;
+ vsi->tc_cfg.numtc = 1;
+ }
+}
+
+/**
+ * ice_dcb_get_tc - Get the TC associated with the queue
+ * @vsi: ptr to the VSI
+ * @queue_index: queue number associated with VSI
+ */
+u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index)
+{
+ return vsi->tx_rings[queue_index]->dcb_tc;
+}
+
+/**
+ * ice_vsi_cfg_dcb_rings - Update rings to reflect DCB TC
+ * @vsi: VSI owner of rings being updated
+ */
+void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi)
+{
+ struct ice_tx_ring *tx_ring;
+ struct ice_rx_ring *rx_ring;
+ u16 qoffset, qcount;
+ int i, n;
+
+ if (!test_bit(ICE_FLAG_DCB_ENA, vsi->back->flags)) {
+ /* Reset the TC information */
+ ice_for_each_txq(vsi, i) {
+ tx_ring = vsi->tx_rings[i];
+ tx_ring->dcb_tc = 0;
+ }
+ ice_for_each_rxq(vsi, i) {
+ rx_ring = vsi->rx_rings[i];
+ rx_ring->dcb_tc = 0;
+ }
+ return;
+ }
+
+ ice_for_each_traffic_class(n) {
+ if (!(vsi->tc_cfg.ena_tc & BIT(n)))
+ break;
+
+ qoffset = vsi->tc_cfg.tc_info[n].qoffset;
+ qcount = vsi->tc_cfg.tc_info[n].qcount_tx;
+ for (i = qoffset; i < (qoffset + qcount); i++)
+ vsi->tx_rings[i]->dcb_tc = n;
+
+ qcount = vsi->tc_cfg.tc_info[n].qcount_rx;
+ for (i = qoffset; i < (qoffset + qcount); i++)
+ vsi->rx_rings[i]->dcb_tc = n;
+ }
+ /* applicable only if "all_enatc" is set, which will be set from
+ * setup_tc method as part of configuring channels
+ */
+ if (vsi->all_enatc) {
+ u8 first_droptc = ice_get_first_droptc(vsi);
+
+ /* When DCB is configured, TC for ADQ queues (which are really
+ * PF queues) should be the first drop TC of the main VSI
+ */
+ ice_for_each_chnl_tc(n) {
+ if (!(vsi->all_enatc & BIT(n)))
+ break;
+
+ qoffset = vsi->mqprio_qopt.qopt.offset[n];
+ qcount = vsi->mqprio_qopt.qopt.count[n];
+ for (i = qoffset; i < (qoffset + qcount); i++) {
+ vsi->tx_rings[i]->dcb_tc = first_droptc;
+ vsi->rx_rings[i]->dcb_tc = first_droptc;
+ }
+ }
+ }
+}
+
+/**
+ * ice_dcb_ena_dis_vsi - disable certain VSIs for DCB config/reconfig
+ * @pf: pointer to the PF instance
+ * @ena: true to enable VSIs, false to disable
+ * @locked: true if caller holds RTNL lock, false otherwise
+ *
+ * Before a new DCB configuration can be applied, VSIs of type PF, SWITCHDEV
+ * and CHNL need to be brought down. Following completion of DCB configuration
+ * the VSIs that were downed need to be brought up again. This helper function
+ * does both.
+ */
+static void ice_dcb_ena_dis_vsi(struct ice_pf *pf, bool ena, bool locked)
+{
+ int i;
+
+ ice_for_each_vsi(pf, i) {
+ struct ice_vsi *vsi = pf->vsi[i];
+
+ if (!vsi)
+ continue;
+
+ switch (vsi->type) {
+ case ICE_VSI_CHNL:
+ case ICE_VSI_SWITCHDEV_CTRL:
+ case ICE_VSI_PF:
+ if (ena)
+ ice_ena_vsi(vsi, locked);
+ else
+ ice_dis_vsi(vsi, locked);
+ break;
+ default:
+ continue;
+ }
+ }
+}
+
+/**
+ * ice_dcb_bwchk - check if ETS bandwidth input parameters are correct
+ * @pf: pointer to the PF struct
+ * @dcbcfg: pointer to DCB config structure
+ */
+int ice_dcb_bwchk(struct ice_pf *pf, struct ice_dcbx_cfg *dcbcfg)
+{
+ struct ice_dcb_ets_cfg *etscfg = &dcbcfg->etscfg;
+ u8 num_tc, total_bw = 0;
+ int i;
+
+ /* returns number of contigous TCs and 1 TC for non-contigous TCs,
+ * since at least 1 TC has to be configured
+ */
+ num_tc = ice_dcb_get_num_tc(dcbcfg);
+
+ /* no bandwidth checks required if there's only one TC, so assign
+ * all bandwidth to TC0 and return
+ */
+ if (num_tc == 1) {
+ etscfg->tcbwtable[0] = ICE_TC_MAX_BW;
+ return 0;
+ }
+
+ for (i = 0; i < num_tc; i++)
+ total_bw += etscfg->tcbwtable[i];
+
+ if (!total_bw) {
+ etscfg->tcbwtable[0] = ICE_TC_MAX_BW;
+ } else if (total_bw != ICE_TC_MAX_BW) {
+ dev_err(ice_pf_to_dev(pf), "Invalid config, total bandwidth must equal 100\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_pf_dcb_cfg - Apply new DCB configuration
+ * @pf: pointer to the PF struct
+ * @new_cfg: DCBX config to apply
+ * @locked: is the RTNL held
+ */
+int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
+{
+ struct ice_aqc_port_ets_elem buf = { 0 };
+ struct ice_dcbx_cfg *old_cfg, *curr_cfg;
+ struct device *dev = ice_pf_to_dev(pf);
+ int ret = ICE_DCB_NO_HW_CHG;
+ struct iidc_event *event;
+ struct ice_vsi *pf_vsi;
+
+ curr_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+ /* FW does not care if change happened */
+ if (!pf->hw.port_info->qos_cfg.is_sw_lldp)
+ ret = ICE_DCB_HW_CHG_RST;
+
+ /* Enable DCB tagging only when more than one TC */
+ if (ice_dcb_get_num_tc(new_cfg) > 1) {
+ dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n");
+ set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+ } else {
+ dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n");
+ clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+ }
+
+ if (!memcmp(new_cfg, curr_cfg, sizeof(*new_cfg))) {
+ dev_dbg(dev, "No change in DCB config required\n");
+ return ret;
+ }
+
+ if (ice_dcb_bwchk(pf, new_cfg))
+ return -EINVAL;
+
+ /* Store old config in case FW config fails */
+ old_cfg = kmemdup(curr_cfg, sizeof(*old_cfg), GFP_KERNEL);
+ if (!old_cfg)
+ return -ENOMEM;
+
+ dev_info(dev, "Commit DCB Configuration to the hardware\n");
+ pf_vsi = ice_get_main_vsi(pf);
+ if (!pf_vsi) {
+ dev_dbg(dev, "PF VSI doesn't exist\n");
+ ret = -EINVAL;
+ goto free_cfg;
+ }
+
+ /* Notify AUX drivers about impending change to TCs */
+ event = kzalloc(sizeof(*event), GFP_KERNEL);
+ if (!event) {
+ ret = -ENOMEM;
+ goto free_cfg;
+ }
+
+ set_bit(IIDC_EVENT_BEFORE_TC_CHANGE, event->type);
+ ice_send_event_to_aux(pf, event);
+ kfree(event);
+
+ /* avoid race conditions by holding the lock while disabling and
+ * re-enabling the VSI
+ */
+ if (!locked)
+ rtnl_lock();
+
+ /* disable VSIs affected by DCB changes */
+ ice_dcb_ena_dis_vsi(pf, false, true);
+
+ memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg));
+ memcpy(&curr_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
+ memcpy(&new_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
+
+ /* Only send new config to HW if we are in SW LLDP mode. Otherwise,
+ * the new config came from the HW in the first place.
+ */
+ if (pf->hw.port_info->qos_cfg.is_sw_lldp) {
+ ret = ice_set_dcb_cfg(pf->hw.port_info);
+ if (ret) {
+ dev_err(dev, "Set DCB Config failed\n");
+ /* Restore previous settings to local config */
+ memcpy(curr_cfg, old_cfg, sizeof(*curr_cfg));
+ goto out;
+ }
+ }
+
+ ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+ if (ret) {
+ dev_err(dev, "Query Port ETS failed\n");
+ goto out;
+ }
+
+ ice_pf_dcb_recfg(pf, false);
+
+out:
+ /* enable previously downed VSIs */
+ ice_dcb_ena_dis_vsi(pf, true, true);
+ if (!locked)
+ rtnl_unlock();
+free_cfg:
+ kfree(old_cfg);
+ return ret;
+}
+
+/**
+ * ice_cfg_etsrec_defaults - Set default ETS recommended DCB config
+ * @pi: port information structure
+ */
+static void ice_cfg_etsrec_defaults(struct ice_port_info *pi)
+{
+ struct ice_dcbx_cfg *dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
+ u8 i;
+
+ /* Ensure ETS recommended DCB configuration is not already set */
+ if (dcbcfg->etsrec.maxtcs)
+ return;
+
+ /* In CEE mode, set the default to 1 TC */
+ dcbcfg->etsrec.maxtcs = 1;
+ for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+ dcbcfg->etsrec.tcbwtable[i] = i ? 0 : 100;
+ dcbcfg->etsrec.tsatable[i] = i ? ICE_IEEE_TSA_STRICT :
+ ICE_IEEE_TSA_ETS;
+ }
+}
+
+/**
+ * ice_dcb_need_recfg - Check if DCB needs reconfig
+ * @pf: board private structure
+ * @old_cfg: current DCB config
+ * @new_cfg: new DCB config
+ */
+static bool
+ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+ struct ice_dcbx_cfg *new_cfg)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ bool need_reconfig = false;
+
+ /* Check if ETS configuration has changed */
+ if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg,
+ sizeof(new_cfg->etscfg))) {
+ /* If Priority Table has changed reconfig is needed */
+ if (memcmp(&new_cfg->etscfg.prio_table,
+ &old_cfg->etscfg.prio_table,
+ sizeof(new_cfg->etscfg.prio_table))) {
+ need_reconfig = true;
+ dev_dbg(dev, "ETS UP2TC changed.\n");
+ }
+
+ if (memcmp(&new_cfg->etscfg.tcbwtable,
+ &old_cfg->etscfg.tcbwtable,
+ sizeof(new_cfg->etscfg.tcbwtable)))
+ dev_dbg(dev, "ETS TC BW Table changed.\n");
+
+ if (memcmp(&new_cfg->etscfg.tsatable,
+ &old_cfg->etscfg.tsatable,
+ sizeof(new_cfg->etscfg.tsatable)))
+ dev_dbg(dev, "ETS TSA Table changed.\n");
+ }
+
+ /* Check if PFC configuration has changed */
+ if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) {
+ need_reconfig = true;
+ dev_dbg(dev, "PFC config change detected.\n");
+ }
+
+ /* Check if APP Table has changed */
+ if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app))) {
+ need_reconfig = true;
+ dev_dbg(dev, "APP Table change detected.\n");
+ }
+
+ dev_dbg(dev, "dcb need_reconfig=%d\n", need_reconfig);
+ return need_reconfig;
+}
+
+/**
+ * ice_dcb_rebuild - rebuild DCB post reset
+ * @pf: physical function instance
+ */
+void ice_dcb_rebuild(struct ice_pf *pf)
+{
+ struct ice_aqc_port_ets_elem buf = { 0 };
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_dcbx_cfg *err_cfg;
+ int ret;
+
+ ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+ if (ret) {
+ dev_err(dev, "Query Port ETS failed\n");
+ goto dcb_error;
+ }
+
+ mutex_lock(&pf->tc_mutex);
+
+ if (!pf->hw.port_info->qos_cfg.is_sw_lldp)
+ ice_cfg_etsrec_defaults(pf->hw.port_info);
+
+ ret = ice_set_dcb_cfg(pf->hw.port_info);
+ if (ret) {
+ dev_err(dev, "Failed to set DCB config in rebuild\n");
+ goto dcb_error;
+ }
+
+ if (!pf->hw.port_info->qos_cfg.is_sw_lldp) {
+ ret = ice_cfg_lldp_mib_change(&pf->hw, true);
+ if (ret && !pf->hw.port_info->qos_cfg.is_sw_lldp) {
+ dev_err(dev, "Failed to register for MIB changes\n");
+ goto dcb_error;
+ }
+ }
+
+ dev_info(dev, "DCB info restored\n");
+ ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+ if (ret) {
+ dev_err(dev, "Query Port ETS failed\n");
+ goto dcb_error;
+ }
+
+ mutex_unlock(&pf->tc_mutex);
+
+ return;
+
+dcb_error:
+ dev_err(dev, "Disabling DCB until new settings occur\n");
+ err_cfg = kzalloc(sizeof(*err_cfg), GFP_KERNEL);
+ if (!err_cfg) {
+ mutex_unlock(&pf->tc_mutex);
+ return;
+ }
+
+ err_cfg->etscfg.willing = true;
+ err_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW;
+ err_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
+ memcpy(&err_cfg->etsrec, &err_cfg->etscfg, sizeof(err_cfg->etsrec));
+ /* Coverity warns the return code of ice_pf_dcb_cfg() is not checked
+ * here as is done for other calls to that function. That check is
+ * not necessary since this is in this function's error cleanup path.
+ * Suppress the Coverity warning with the following comment...
+ */
+ /* coverity[check_return] */
+ ice_pf_dcb_cfg(pf, err_cfg, false);
+ kfree(err_cfg);
+
+ mutex_unlock(&pf->tc_mutex);
+}
+
+/**
+ * ice_dcb_init_cfg - set the initial DCB config in SW
+ * @pf: PF to apply config to
+ * @locked: Is the RTNL held
+ */
+static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked)
+{
+ struct ice_dcbx_cfg *newcfg;
+ struct ice_port_info *pi;
+ int ret = 0;
+
+ pi = pf->hw.port_info;
+ newcfg = kmemdup(&pi->qos_cfg.local_dcbx_cfg, sizeof(*newcfg),
+ GFP_KERNEL);
+ if (!newcfg)
+ return -ENOMEM;
+
+ memset(&pi->qos_cfg.local_dcbx_cfg, 0, sizeof(*newcfg));
+
+ dev_info(ice_pf_to_dev(pf), "Configuring initial DCB values\n");
+ if (ice_pf_dcb_cfg(pf, newcfg, locked))
+ ret = -EINVAL;
+
+ kfree(newcfg);
+
+ return ret;
+}
+
+/**
+ * ice_dcb_sw_dflt_cfg - Apply a default DCB config
+ * @pf: PF to apply config to
+ * @ets_willing: configure ETS willing
+ * @locked: was this function called with RTNL held
+ */
+int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked)
+{
+ struct ice_aqc_port_ets_elem buf = { 0 };
+ struct ice_dcbx_cfg *dcbcfg;
+ struct ice_port_info *pi;
+ struct ice_hw *hw;
+ int ret;
+
+ hw = &pf->hw;
+ pi = hw->port_info;
+ dcbcfg = kzalloc(sizeof(*dcbcfg), GFP_KERNEL);
+ if (!dcbcfg)
+ return -ENOMEM;
+
+ memset(&pi->qos_cfg.local_dcbx_cfg, 0, sizeof(*dcbcfg));
+
+ dcbcfg->etscfg.willing = ets_willing ? 1 : 0;
+ dcbcfg->etscfg.maxtcs = hw->func_caps.common_cap.maxtc;
+ dcbcfg->etscfg.tcbwtable[0] = 100;
+ dcbcfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
+
+ memcpy(&dcbcfg->etsrec, &dcbcfg->etscfg,
+ sizeof(dcbcfg->etsrec));
+ dcbcfg->etsrec.willing = 0;
+
+ dcbcfg->pfc.willing = 1;
+ dcbcfg->pfc.pfccap = hw->func_caps.common_cap.maxtc;
+
+ dcbcfg->numapps = 1;
+ dcbcfg->app[0].selector = ICE_APP_SEL_ETHTYPE;
+ dcbcfg->app[0].priority = 3;
+ dcbcfg->app[0].prot_id = ETH_P_FCOE;
+
+ ret = ice_pf_dcb_cfg(pf, dcbcfg, locked);
+ kfree(dcbcfg);
+ if (ret)
+ return ret;
+
+ return ice_query_port_ets(pi, &buf, sizeof(buf), NULL);
+}
+
+/**
+ * ice_dcb_tc_contig - Check that TCs are contiguous
+ * @prio_table: pointer to priority table
+ *
+ * Check if TCs begin with TC0 and are contiguous
+ */
+static bool ice_dcb_tc_contig(u8 *prio_table)
+{
+ bool found_empty = false;
+ u8 used_tc = 0;
+ int i;
+
+ /* Create a bitmap of used TCs */
+ for (i = 0; i < CEE_DCBX_MAX_PRIO; i++)
+ used_tc |= BIT(prio_table[i]);
+
+ for (i = 0; i < CEE_DCBX_MAX_PRIO; i++) {
+ if (used_tc & BIT(i)) {
+ if (found_empty)
+ return false;
+ } else {
+ found_empty = true;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * ice_dcb_noncontig_cfg - Configure DCB for non-contiguous TCs
+ * @pf: pointer to the PF struct
+ *
+ * If non-contiguous TCs, then configure SW DCB with TC0 and ETS non-willing
+ */
+static int ice_dcb_noncontig_cfg(struct ice_pf *pf)
+{
+ struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+ struct device *dev = ice_pf_to_dev(pf);
+ int ret;
+
+ /* Configure SW DCB default with ETS non-willing */
+ ret = ice_dcb_sw_dflt_cfg(pf, false, true);
+ if (ret) {
+ dev_err(dev, "Failed to set local DCB config %d\n", ret);
+ return ret;
+ }
+
+ /* Reconfigure with ETS willing so that FW will send LLDP MIB event */
+ dcbcfg->etscfg.willing = 1;
+ ret = ice_set_dcb_cfg(pf->hw.port_info);
+ if (ret)
+ dev_err(dev, "Failed to set DCB to unwilling\n");
+
+ return ret;
+}
+
+/**
+ * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs
+ * @pf: pointer to the PF struct
+ * @locked: is adev device lock held
+ *
+ * Assumed caller has already disabled all VSIs before
+ * calling this function. Reconfiguring DCB based on
+ * local_dcbx_cfg.
+ */
+void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked)
+{
+ struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+ struct iidc_event *event;
+ u8 tc_map = 0;
+ int v, ret;
+
+ /* Update each VSI */
+ ice_for_each_vsi(pf, v) {
+ struct ice_vsi *vsi = pf->vsi[v];
+
+ if (!vsi)
+ continue;
+
+ if (vsi->type == ICE_VSI_PF) {
+ tc_map = ice_dcb_get_ena_tc(dcbcfg);
+
+ /* If DCBX request non-contiguous TC, then configure
+ * default TC
+ */
+ if (!ice_dcb_tc_contig(dcbcfg->etscfg.prio_table)) {
+ tc_map = ICE_DFLT_TRAFFIC_CLASS;
+ ice_dcb_noncontig_cfg(pf);
+ }
+ } else if (vsi->type == ICE_VSI_CHNL) {
+ tc_map = BIT(ice_get_first_droptc(vsi));
+ } else {
+ tc_map = ICE_DFLT_TRAFFIC_CLASS;
+ }
+
+ ret = ice_vsi_cfg_tc(vsi, tc_map);
+ if (ret) {
+ dev_err(ice_pf_to_dev(pf), "Failed to config TC for VSI index: %d\n",
+ vsi->idx);
+ continue;
+ }
+ /* no need to proceed with remaining cfg if it is CHNL
+ * or switchdev VSI
+ */
+ if (vsi->type == ICE_VSI_CHNL ||
+ vsi->type == ICE_VSI_SWITCHDEV_CTRL)
+ continue;
+
+ ice_vsi_map_rings_to_vectors(vsi);
+ if (vsi->type == ICE_VSI_PF)
+ ice_dcbnl_set_all(vsi);
+ }
+ if (!locked) {
+ /* Notify the AUX drivers that TC change is finished */
+ event = kzalloc(sizeof(*event), GFP_KERNEL);
+ if (!event)
+ return;
+
+ set_bit(IIDC_EVENT_AFTER_TC_CHANGE, event->type);
+ ice_send_event_to_aux(pf, event);
+ kfree(event);
+ }
+}
+
+/**
+ * ice_init_pf_dcb - initialize DCB for a PF
+ * @pf: PF to initialize DCB for
+ * @locked: Was function called with RTNL held
+ */
+int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_port_info *port_info;
+ struct ice_hw *hw = &pf->hw;
+ int err;
+
+ port_info = hw->port_info;
+
+ err = ice_init_dcb(hw, false);
+ if (err && !port_info->qos_cfg.is_sw_lldp) {
+ dev_err(dev, "Error initializing DCB %d\n", err);
+ goto dcb_init_err;
+ }
+
+ dev_info(dev, "DCB is enabled in the hardware, max number of TCs supported on this port are %d\n",
+ pf->hw.func_caps.common_cap.maxtc);
+ if (err) {
+ struct ice_vsi *pf_vsi;
+
+ /* FW LLDP is disabled, activate SW DCBX/LLDP mode */
+ dev_info(dev, "FW LLDP is disabled, DCBx/LLDP in SW mode.\n");
+ clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
+ err = ice_aq_set_pfc_mode(&pf->hw, ICE_AQC_PFC_VLAN_BASED_PFC,
+ NULL);
+ if (err)
+ dev_info(dev, "Failed to set VLAN PFC mode\n");
+
+ err = ice_dcb_sw_dflt_cfg(pf, true, locked);
+ if (err) {
+ dev_err(dev, "Failed to set local DCB config %d\n",
+ err);
+ err = -EIO;
+ goto dcb_init_err;
+ }
+
+ /* If the FW DCBX engine is not running then Rx LLDP packets
+ * need to be redirected up the stack.
+ */
+ pf_vsi = ice_get_main_vsi(pf);
+ if (!pf_vsi) {
+ dev_err(dev, "Failed to set local DCB config\n");
+ err = -EIO;
+ goto dcb_init_err;
+ }
+
+ ice_cfg_sw_lldp(pf_vsi, false, true);
+
+ pf->dcbx_cap = ice_dcb_get_mode(port_info, true);
+ return 0;
+ }
+
+ set_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
+
+ /* DCBX/LLDP enabled in FW, set DCBNL mode advertisement */
+ pf->dcbx_cap = ice_dcb_get_mode(port_info, false);
+
+ err = ice_dcb_init_cfg(pf, locked);
+ if (err)
+ goto dcb_init_err;
+
+ return err;
+
+dcb_init_err:
+ dev_err(dev, "DCB init failed\n");
+ return err;
+}
+
+/**
+ * ice_update_dcb_stats - Update DCB stats counters
+ * @pf: PF whose stats needs to be updated
+ */
+void ice_update_dcb_stats(struct ice_pf *pf)
+{
+ struct ice_hw_port_stats *prev_ps, *cur_ps;
+ struct ice_hw *hw = &pf->hw;
+ u8 port;
+ int i;
+
+ port = hw->port_info->lport;
+ prev_ps = &pf->stats_prev;
+ cur_ps = &pf->stats;
+
+ for (i = 0; i < 8; i++) {
+ ice_stat_update32(hw, GLPRT_PXOFFRXC(port, i),
+ pf->stat_prev_loaded,
+ &prev_ps->priority_xoff_rx[i],
+ &cur_ps->priority_xoff_rx[i]);
+ ice_stat_update32(hw, GLPRT_PXONRXC(port, i),
+ pf->stat_prev_loaded,
+ &prev_ps->priority_xon_rx[i],
+ &cur_ps->priority_xon_rx[i]);
+ ice_stat_update32(hw, GLPRT_PXONTXC(port, i),
+ pf->stat_prev_loaded,
+ &prev_ps->priority_xon_tx[i],
+ &cur_ps->priority_xon_tx[i]);
+ ice_stat_update32(hw, GLPRT_PXOFFTXC(port, i),
+ pf->stat_prev_loaded,
+ &prev_ps->priority_xoff_tx[i],
+ &cur_ps->priority_xoff_tx[i]);
+ ice_stat_update32(hw, GLPRT_RXON2OFFCNT(port, i),
+ pf->stat_prev_loaded,
+ &prev_ps->priority_xon_2_xoff[i],
+ &cur_ps->priority_xon_2_xoff[i]);
+ }
+}
+
+/**
+ * ice_tx_prepare_vlan_flags_dcb - prepare VLAN tagging for DCB
+ * @tx_ring: ring to send buffer on
+ * @first: pointer to struct ice_tx_buf
+ *
+ * This should not be called if the outer VLAN is software offloaded as the VLAN
+ * tag will already be configured with the correct ID and priority bits
+ */
+void
+ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring,
+ struct ice_tx_buf *first)
+{
+ struct sk_buff *skb = first->skb;
+
+ if (!test_bit(ICE_FLAG_DCB_ENA, tx_ring->vsi->back->flags))
+ return;
+
+ /* Insert 802.1p priority into VLAN header */
+ if ((first->tx_flags & ICE_TX_FLAGS_HW_VLAN ||
+ first->tx_flags & ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN) ||
+ skb->priority != TC_PRIO_CONTROL) {
+ first->tx_flags &= ~ICE_TX_FLAGS_VLAN_PR_M;
+ /* Mask the lower 3 bits to set the 802.1p priority */
+ first->tx_flags |= (skb->priority & 0x7) <<
+ ICE_TX_FLAGS_VLAN_PR_S;
+ /* if this is not already set it means a VLAN 0 + priority needs
+ * to be offloaded
+ */
+ if (tx_ring->flags & ICE_TX_FLAGS_RING_VLAN_L2TAG2)
+ first->tx_flags |= ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN;
+ else
+ first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
+ }
+}
+
+/**
+ * ice_dcb_process_lldp_set_mib_change - Process MIB change
+ * @pf: ptr to ice_pf
+ * @event: pointer to the admin queue receive event
+ */
+void
+ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
+ struct ice_rq_event_info *event)
+{
+ struct ice_aqc_port_ets_elem buf = { 0 };
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_aqc_lldp_get_mib *mib;
+ struct ice_dcbx_cfg tmp_dcbx_cfg;
+ bool need_reconfig = false;
+ struct ice_port_info *pi;
+ u8 mib_type;
+ int ret;
+
+ /* Not DCB capable or capability disabled */
+ if (!(test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)))
+ return;
+
+ if (pf->dcbx_cap & DCB_CAP_DCBX_HOST) {
+ dev_dbg(dev, "MIB Change Event in HOST mode\n");
+ return;
+ }
+
+ pi = pf->hw.port_info;
+ mib = (struct ice_aqc_lldp_get_mib *)&event->desc.params.raw;
+ /* Ignore if event is not for Nearest Bridge */
+ mib_type = ((mib->type >> ICE_AQ_LLDP_BRID_TYPE_S) &
+ ICE_AQ_LLDP_BRID_TYPE_M);
+ dev_dbg(dev, "LLDP event MIB bridge type 0x%x\n", mib_type);
+ if (mib_type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID)
+ return;
+
+ /* Check MIB Type and return if event for Remote MIB update */
+ mib_type = mib->type & ICE_AQ_LLDP_MIB_TYPE_M;
+ dev_dbg(dev, "LLDP event mib type %s\n", mib_type ? "remote" : "local");
+ if (mib_type == ICE_AQ_LLDP_MIB_REMOTE) {
+ /* Update the remote cached instance and return */
+ ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
+ ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
+ &pi->qos_cfg.remote_dcbx_cfg);
+ if (ret) {
+ dev_err(dev, "Failed to get remote DCB config\n");
+ return;
+ }
+ }
+
+ mutex_lock(&pf->tc_mutex);
+
+ /* store the old configuration */
+ tmp_dcbx_cfg = pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+ /* Reset the old DCBX configuration data */
+ memset(&pi->qos_cfg.local_dcbx_cfg, 0,
+ sizeof(pi->qos_cfg.local_dcbx_cfg));
+
+ /* Get updated DCBX data from firmware */
+ ret = ice_get_dcb_cfg(pf->hw.port_info);
+ if (ret) {
+ dev_err(dev, "Failed to get DCB config\n");
+ goto out;
+ }
+
+ /* No change detected in DCBX configs */
+ if (!memcmp(&tmp_dcbx_cfg, &pi->qos_cfg.local_dcbx_cfg,
+ sizeof(tmp_dcbx_cfg))) {
+ dev_dbg(dev, "No change detected in DCBX configuration.\n");
+ goto out;
+ }
+
+ pf->dcbx_cap = ice_dcb_get_mode(pi, false);
+
+ need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg,
+ &pi->qos_cfg.local_dcbx_cfg);
+ ice_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &pi->qos_cfg.local_dcbx_cfg);
+ if (!need_reconfig)
+ goto out;
+
+ /* Enable DCB tagging only when more than one TC */
+ if (ice_dcb_get_num_tc(&pi->qos_cfg.local_dcbx_cfg) > 1) {
+ dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n");
+ set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+ } else {
+ dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n");
+ clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+ }
+
+ rtnl_lock();
+ /* disable VSIs affected by DCB changes */
+ ice_dcb_ena_dis_vsi(pf, false, true);
+
+ ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+ if (ret) {
+ dev_err(dev, "Query Port ETS failed\n");
+ goto unlock_rtnl;
+ }
+
+ /* changes in configuration update VSI */
+ ice_pf_dcb_recfg(pf, false);
+
+ /* enable previously downed VSIs */
+ ice_dcb_ena_dis_vsi(pf, true, true);
+unlock_rtnl:
+ rtnl_unlock();
+out:
+ mutex_unlock(&pf->tc_mutex);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
new file mode 100644
index 000000000..800879a88
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DCB_LIB_H_
+#define _ICE_DCB_LIB_H_
+
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_lib.h"
+
+#ifdef CONFIG_DCB
+#define ICE_TC_MAX_BW 100 /* Default Max BW percentage */
+#define ICE_DCB_HW_CHG_RST 0 /* DCB configuration changed with reset */
+#define ICE_DCB_NO_HW_CHG 1 /* DCB configuration did not change */
+#define ICE_DCB_HW_CHG 2 /* DCB configuration changed, no reset */
+
+void ice_dcb_rebuild(struct ice_pf *pf);
+int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked);
+u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
+void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi);
+bool ice_is_pfc_causing_hung_q(struct ice_pf *pf, unsigned int txqueue);
+u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index);
+int
+ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked);
+int ice_dcb_bwchk(struct ice_pf *pf, struct ice_dcbx_cfg *dcbcfg);
+void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked);
+void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi);
+int ice_init_pf_dcb(struct ice_pf *pf, bool locked);
+void ice_update_dcb_stats(struct ice_pf *pf);
+void
+ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring,
+ struct ice_tx_buf *first);
+void
+ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
+ struct ice_rq_event_info *event);
+/**
+ * ice_find_q_in_range
+ * @low: start of queue range for a TC i.e. offset of TC
+ * @high: start of queue for next TC
+ * @tx_q: hung_queue/tx_queue
+ *
+ * finds if queue 'tx_q' falls between the two offsets of any given TC
+ */
+static inline bool ice_find_q_in_range(u16 low, u16 high, unsigned int tx_q)
+{
+ return (tx_q >= low) && (tx_q < high);
+}
+
+static inline void
+ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, u8 dcb_tc)
+{
+ tlan_ctx->cgd_num = dcb_tc;
+}
+
+static inline bool ice_is_dcb_active(struct ice_pf *pf)
+{
+ return (test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags) ||
+ test_bit(ICE_FLAG_DCB_ENA, pf->flags));
+}
+
+static inline u8 ice_get_pfc_mode(struct ice_pf *pf)
+{
+ return pf->hw.port_info->qos_cfg.local_dcbx_cfg.pfc_mode;
+}
+
+#else
+static inline void ice_dcb_rebuild(struct ice_pf *pf) { }
+
+static inline void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi)
+{
+ vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS;
+ vsi->tc_cfg.numtc = 1;
+}
+
+static inline u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
+{
+ return ICE_DFLT_TRAFFIC_CLASS;
+}
+
+static inline u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
+{
+ return 1;
+}
+
+static inline u8
+ice_dcb_get_tc(struct ice_vsi __always_unused *vsi,
+ int __always_unused queue_index)
+{
+ return 0;
+}
+
+static inline int
+ice_init_pf_dcb(struct ice_pf *pf, bool __always_unused locked)
+{
+ dev_dbg(ice_pf_to_dev(pf), "DCB not supported\n");
+ return -EOPNOTSUPP;
+}
+
+static inline int
+ice_pf_dcb_cfg(struct ice_pf __always_unused *pf,
+ struct ice_dcbx_cfg __always_unused *new_cfg,
+ bool __always_unused locked)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring __always_unused *tx_ring,
+ struct ice_tx_buf __always_unused *first)
+{
+ return 0;
+}
+
+static inline bool ice_is_dcb_active(struct ice_pf __always_unused *pf)
+{
+ return false;
+}
+
+static inline bool
+ice_is_pfc_causing_hung_q(struct ice_pf __always_unused *pf,
+ unsigned int __always_unused txqueue)
+{
+ return false;
+}
+
+static inline u8 ice_get_pfc_mode(struct ice_pf *pf)
+{
+ return 0;
+}
+
+static inline void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked) { }
+static inline void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) { }
+static inline void ice_update_dcb_stats(struct ice_pf *pf) { }
+static inline void
+ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event) { }
+static inline void ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, u8 dcb_tc) { }
+#endif /* CONFIG_DCB */
+#endif /* _ICE_DCB_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
new file mode 100644
index 000000000..3eb01731e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
@@ -0,0 +1,1096 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_dcb.h"
+#include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
+#include <net/dcbnl.h>
+
+/**
+ * ice_dcbnl_devreset - perform enough of a ifdown/ifup to sync DCBNL info
+ * @netdev: device associated with interface that needs reset
+ */
+static void ice_dcbnl_devreset(struct net_device *netdev)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+ while (ice_is_reset_in_progress(pf->state))
+ usleep_range(1000, 2000);
+
+ dev_close(netdev);
+ netdev_state_change(netdev);
+ dev_open(netdev, NULL);
+ netdev_state_change(netdev);
+}
+
+/**
+ * ice_dcbnl_getets - retrieve local ETS configuration
+ * @netdev: the relevant netdev
+ * @ets: struct to hold ETS configuration
+ */
+static int ice_dcbnl_getets(struct net_device *netdev, struct ieee_ets *ets)
+{
+ struct ice_dcbx_cfg *dcbxcfg;
+ struct ice_pf *pf;
+
+ pf = ice_netdev_to_pf(netdev);
+ dcbxcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+ ets->willing = dcbxcfg->etscfg.willing;
+ ets->ets_cap = dcbxcfg->etscfg.maxtcs;
+ ets->cbs = dcbxcfg->etscfg.cbs;
+ memcpy(ets->tc_tx_bw, dcbxcfg->etscfg.tcbwtable, sizeof(ets->tc_tx_bw));
+ memcpy(ets->tc_rx_bw, dcbxcfg->etscfg.tcbwtable, sizeof(ets->tc_rx_bw));
+ memcpy(ets->tc_tsa, dcbxcfg->etscfg.tsatable, sizeof(ets->tc_tsa));
+ memcpy(ets->prio_tc, dcbxcfg->etscfg.prio_table, sizeof(ets->prio_tc));
+ memcpy(ets->tc_reco_bw, dcbxcfg->etsrec.tcbwtable,
+ sizeof(ets->tc_reco_bw));
+ memcpy(ets->tc_reco_tsa, dcbxcfg->etsrec.tsatable,
+ sizeof(ets->tc_reco_tsa));
+ memcpy(ets->reco_prio_tc, dcbxcfg->etscfg.prio_table,
+ sizeof(ets->reco_prio_tc));
+
+ return 0;
+}
+
+/**
+ * ice_dcbnl_setets - set IEEE ETS configuration
+ * @netdev: pointer to relevant netdev
+ * @ets: struct to hold ETS configuration
+ */
+static int ice_dcbnl_setets(struct net_device *netdev, struct ieee_ets *ets)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_dcbx_cfg *new_cfg;
+ int bwcfg = 0, bwrec = 0;
+ int err, i;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+ return -EINVAL;
+
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+ mutex_lock(&pf->tc_mutex);
+
+ new_cfg->etscfg.willing = ets->willing;
+ new_cfg->etscfg.cbs = ets->cbs;
+ ice_for_each_traffic_class(i) {
+ new_cfg->etscfg.tcbwtable[i] = ets->tc_tx_bw[i];
+ bwcfg += ets->tc_tx_bw[i];
+ new_cfg->etscfg.tsatable[i] = ets->tc_tsa[i];
+ if (new_cfg->pfc_mode == ICE_QOS_MODE_VLAN) {
+ /* in DSCP mode up->tc mapping cannot change */
+ new_cfg->etscfg.prio_table[i] = ets->prio_tc[i];
+ new_cfg->etsrec.prio_table[i] = ets->reco_prio_tc[i];
+ }
+ new_cfg->etsrec.tcbwtable[i] = ets->tc_reco_bw[i];
+ bwrec += ets->tc_reco_bw[i];
+ new_cfg->etsrec.tsatable[i] = ets->tc_reco_tsa[i];
+ }
+
+ if (ice_dcb_bwchk(pf, new_cfg)) {
+ err = -EINVAL;
+ goto ets_out;
+ }
+
+ new_cfg->etscfg.maxtcs = pf->hw.func_caps.common_cap.maxtc;
+
+ if (!bwcfg)
+ new_cfg->etscfg.tcbwtable[0] = 100;
+
+ if (!bwrec)
+ new_cfg->etsrec.tcbwtable[0] = 100;
+
+ err = ice_pf_dcb_cfg(pf, new_cfg, true);
+ /* return of zero indicates new cfg applied */
+ if (err == ICE_DCB_HW_CHG_RST)
+ ice_dcbnl_devreset(netdev);
+ if (err == ICE_DCB_NO_HW_CHG)
+ err = ICE_DCB_HW_CHG_RST;
+
+ets_out:
+ mutex_unlock(&pf->tc_mutex);
+ return err;
+}
+
+/**
+ * ice_dcbnl_getnumtcs - Get max number of traffic classes supported
+ * @dev: pointer to netdev struct
+ * @tcid: TC ID
+ * @num: total number of TCs supported by the adapter
+ *
+ * Return the total number of TCs supported
+ */
+static int
+ice_dcbnl_getnumtcs(struct net_device *dev, int __always_unused tcid, u8 *num)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(dev);
+
+ if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))
+ return -EINVAL;
+
+ *num = pf->hw.func_caps.common_cap.maxtc;
+ return 0;
+}
+
+/**
+ * ice_dcbnl_getdcbx - retrieve current DCBX capability
+ * @netdev: pointer to the netdev struct
+ */
+static u8 ice_dcbnl_getdcbx(struct net_device *netdev)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+ return pf->dcbx_cap;
+}
+
+/**
+ * ice_dcbnl_setdcbx - set required DCBX capability
+ * @netdev: the corresponding netdev
+ * @mode: required mode
+ */
+static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_qos_cfg *qos_cfg;
+
+ /* if FW LLDP agent is running, DCBNL not allowed to change mode */
+ if (test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
+ return ICE_DCB_NO_HW_CHG;
+
+ /* No support for LLD_MANAGED modes or CEE+IEEE */
+ if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
+ ((mode & DCB_CAP_DCBX_VER_IEEE) && (mode & DCB_CAP_DCBX_VER_CEE)) ||
+ !(mode & DCB_CAP_DCBX_HOST))
+ return ICE_DCB_NO_HW_CHG;
+
+ /* Already set to the given mode no change */
+ if (mode == pf->dcbx_cap)
+ return ICE_DCB_NO_HW_CHG;
+
+ qos_cfg = &pf->hw.port_info->qos_cfg;
+
+ /* DSCP configuration is not DCBx negotiated */
+ if (qos_cfg->local_dcbx_cfg.pfc_mode == ICE_QOS_MODE_DSCP)
+ return ICE_DCB_NO_HW_CHG;
+
+ pf->dcbx_cap = mode;
+
+ if (mode & DCB_CAP_DCBX_VER_CEE)
+ qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_CEE;
+ else
+ qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_IEEE;
+
+ dev_info(ice_pf_to_dev(pf), "DCBx mode = 0x%x\n", mode);
+ return ICE_DCB_HW_CHG_RST;
+}
+
+/**
+ * ice_dcbnl_get_perm_hw_addr - MAC address used by DCBX
+ * @netdev: pointer to netdev struct
+ * @perm_addr: buffer to return permanent MAC address
+ */
+static void ice_dcbnl_get_perm_hw_addr(struct net_device *netdev, u8 *perm_addr)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_port_info *pi = pf->hw.port_info;
+ int i, j;
+
+ memset(perm_addr, 0xff, MAX_ADDR_LEN);
+
+ for (i = 0; i < netdev->addr_len; i++)
+ perm_addr[i] = pi->mac.perm_addr[i];
+
+ for (j = 0; j < netdev->addr_len; j++, i++)
+ perm_addr[i] = pi->mac.perm_addr[j];
+}
+
+/**
+ * ice_get_pfc_delay - Retrieve PFC Link Delay
+ * @hw: pointer to HW struct
+ * @delay: holds the PFC Link Delay value
+ */
+static void ice_get_pfc_delay(struct ice_hw *hw, u16 *delay)
+{
+ u32 val;
+
+ val = rd32(hw, PRTDCB_GENC);
+ *delay = (u16)((val & PRTDCB_GENC_PFCLDA_M) >> PRTDCB_GENC_PFCLDA_S);
+}
+
+/**
+ * ice_dcbnl_getpfc - retrieve local IEEE PFC config
+ * @netdev: pointer to netdev struct
+ * @pfc: struct to hold PFC info
+ */
+static int ice_dcbnl_getpfc(struct net_device *netdev, struct ieee_pfc *pfc)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_port_info *pi = pf->hw.port_info;
+ struct ice_dcbx_cfg *dcbxcfg;
+ int i;
+
+ dcbxcfg = &pi->qos_cfg.local_dcbx_cfg;
+ pfc->pfc_cap = dcbxcfg->pfc.pfccap;
+ pfc->pfc_en = dcbxcfg->pfc.pfcena;
+ pfc->mbc = dcbxcfg->pfc.mbc;
+ ice_get_pfc_delay(&pf->hw, &pfc->delay);
+
+ ice_for_each_traffic_class(i) {
+ pfc->requests[i] = pf->stats.priority_xoff_tx[i];
+ pfc->indications[i] = pf->stats.priority_xoff_rx[i];
+ }
+
+ return 0;
+}
+
+/**
+ * ice_dcbnl_setpfc - set local IEEE PFC config
+ * @netdev: pointer to relevant netdev
+ * @pfc: pointer to struct holding PFC config
+ */
+static int ice_dcbnl_setpfc(struct net_device *netdev, struct ieee_pfc *pfc)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_dcbx_cfg *new_cfg;
+ int err;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+ return -EINVAL;
+
+ mutex_lock(&pf->tc_mutex);
+
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+ if (pfc->pfc_cap)
+ new_cfg->pfc.pfccap = pfc->pfc_cap;
+ else
+ new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc;
+
+ new_cfg->pfc.pfcena = pfc->pfc_en;
+
+ err = ice_pf_dcb_cfg(pf, new_cfg, true);
+ if (err == ICE_DCB_HW_CHG_RST)
+ ice_dcbnl_devreset(netdev);
+ if (err == ICE_DCB_NO_HW_CHG)
+ err = ICE_DCB_HW_CHG_RST;
+ mutex_unlock(&pf->tc_mutex);
+ return err;
+}
+
+/**
+ * ice_dcbnl_get_pfc_cfg - Get CEE PFC config
+ * @netdev: pointer to netdev struct
+ * @prio: corresponding user priority
+ * @setting: the PFC setting for given priority
+ */
+static void
+ice_dcbnl_get_pfc_cfg(struct net_device *netdev, int prio, u8 *setting)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_port_info *pi = pf->hw.port_info;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return;
+
+ if (prio >= ICE_MAX_USER_PRIORITY)
+ return;
+
+ *setting = (pi->qos_cfg.local_dcbx_cfg.pfc.pfcena >> prio) & 0x1;
+ dev_dbg(ice_pf_to_dev(pf), "Get PFC Config up=%d, setting=%d, pfcenable=0x%x\n",
+ prio, *setting, pi->qos_cfg.local_dcbx_cfg.pfc.pfcena);
+}
+
+/**
+ * ice_dcbnl_set_pfc_cfg - Set CEE PFC config
+ * @netdev: the corresponding netdev
+ * @prio: User Priority
+ * @set: PFC setting to apply
+ */
+static void ice_dcbnl_set_pfc_cfg(struct net_device *netdev, int prio, u8 set)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_dcbx_cfg *new_cfg;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return;
+
+ if (prio >= ICE_MAX_USER_PRIORITY)
+ return;
+
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+ new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc;
+ if (set)
+ new_cfg->pfc.pfcena |= BIT(prio);
+ else
+ new_cfg->pfc.pfcena &= ~BIT(prio);
+
+ dev_dbg(ice_pf_to_dev(pf), "Set PFC config UP:%d set:%d pfcena:0x%x\n",
+ prio, set, new_cfg->pfc.pfcena);
+}
+
+/**
+ * ice_dcbnl_getpfcstate - get CEE PFC mode
+ * @netdev: pointer to netdev struct
+ */
+static u8 ice_dcbnl_getpfcstate(struct net_device *netdev)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_port_info *pi = pf->hw.port_info;
+
+ /* Return enabled if any UP enabled for PFC */
+ if (pi->qos_cfg.local_dcbx_cfg.pfc.pfcena)
+ return 1;
+
+ return 0;
+}
+
+/**
+ * ice_dcbnl_getstate - get DCB enabled state
+ * @netdev: pointer to netdev struct
+ */
+static u8 ice_dcbnl_getstate(struct net_device *netdev)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ u8 state = 0;
+
+ state = test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+
+ dev_dbg(ice_pf_to_dev(pf), "DCB enabled state = %d\n", state);
+ return state;
+}
+
+/**
+ * ice_dcbnl_setstate - Set CEE DCB state
+ * @netdev: pointer to relevant netdev
+ * @state: state value to set
+ */
+static u8 ice_dcbnl_setstate(struct net_device *netdev, u8 state)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return ICE_DCB_NO_HW_CHG;
+
+ /* Nothing to do */
+ if (!!state == test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+ return ICE_DCB_NO_HW_CHG;
+
+ if (state) {
+ set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+ memcpy(&pf->hw.port_info->qos_cfg.desired_dcbx_cfg,
+ &pf->hw.port_info->qos_cfg.local_dcbx_cfg,
+ sizeof(struct ice_dcbx_cfg));
+ } else {
+ clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+ }
+
+ return ICE_DCB_HW_CHG;
+}
+
+/**
+ * ice_dcbnl_get_pg_tc_cfg_tx - get CEE PG Tx config
+ * @netdev: pointer to netdev struct
+ * @prio: the corresponding user priority
+ * @prio_type: traffic priority type
+ * @pgid: the BW group ID the traffic class belongs to
+ * @bw_pct: BW percentage for the corresponding BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int prio,
+ u8 __always_unused *prio_type, u8 *pgid,
+ u8 __always_unused *bw_pct,
+ u8 __always_unused *up_map)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_port_info *pi = pf->hw.port_info;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return;
+
+ if (prio >= ICE_MAX_USER_PRIORITY)
+ return;
+
+ *pgid = pi->qos_cfg.local_dcbx_cfg.etscfg.prio_table[prio];
+ dev_dbg(ice_pf_to_dev(pf), "Get PG config prio=%d tc=%d\n", prio,
+ *pgid);
+}
+
+/**
+ * ice_dcbnl_set_pg_tc_cfg_tx - set CEE PG Tx config
+ * @netdev: pointer to relevant netdev
+ * @tc: the corresponding traffic class
+ * @prio_type: the traffic priority type
+ * @bwg_id: the BW group ID the TC belongs to
+ * @bw_pct: the BW perventage for the BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
+ u8 __always_unused prio_type,
+ u8 __always_unused bwg_id,
+ u8 __always_unused bw_pct, u8 up_map)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_dcbx_cfg *new_cfg;
+ int i;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return;
+
+ if (tc >= ICE_MAX_TRAFFIC_CLASS)
+ return;
+
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+ /* prio_type, bwg_id and bw_pct per UP are not supported */
+
+ ice_for_each_traffic_class(i) {
+ if (up_map & BIT(i))
+ new_cfg->etscfg.prio_table[i] = tc;
+ }
+ new_cfg->etscfg.tsatable[tc] = ICE_IEEE_TSA_ETS;
+}
+
+/**
+ * ice_dcbnl_get_pg_bwg_cfg_tx - Get CEE PGBW config
+ * @netdev: pointer to the netdev struct
+ * @pgid: corresponding traffic class
+ * @bw_pct: the BW percentage for the corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 *bw_pct)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_port_info *pi = pf->hw.port_info;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return;
+
+ if (pgid >= ICE_MAX_TRAFFIC_CLASS)
+ return;
+
+ *bw_pct = pi->qos_cfg.local_dcbx_cfg.etscfg.tcbwtable[pgid];
+ dev_dbg(ice_pf_to_dev(pf), "Get PG BW config tc=%d bw_pct=%d\n",
+ pgid, *bw_pct);
+}
+
+/**
+ * ice_dcbnl_set_pg_bwg_cfg_tx - set CEE PG Tx BW config
+ * @netdev: the corresponding netdev
+ * @pgid: Correspongind traffic class
+ * @bw_pct: the BW percentage for the specified TC
+ */
+static void
+ice_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 bw_pct)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_dcbx_cfg *new_cfg;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return;
+
+ if (pgid >= ICE_MAX_TRAFFIC_CLASS)
+ return;
+
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+ new_cfg->etscfg.tcbwtable[pgid] = bw_pct;
+}
+
+/**
+ * ice_dcbnl_get_pg_tc_cfg_rx - Get CEE PG Rx config
+ * @netdev: pointer to netdev struct
+ * @prio: the corresponding user priority
+ * @prio_type: the traffic priority type
+ * @pgid: the PG ID
+ * @bw_pct: the BW percentage for the corresponding BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int prio,
+ u8 __always_unused *prio_type, u8 *pgid,
+ u8 __always_unused *bw_pct,
+ u8 __always_unused *up_map)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_port_info *pi = pf->hw.port_info;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return;
+
+ if (prio >= ICE_MAX_USER_PRIORITY)
+ return;
+
+ *pgid = pi->qos_cfg.local_dcbx_cfg.etscfg.prio_table[prio];
+}
+
+/**
+ * ice_dcbnl_set_pg_tc_cfg_rx
+ * @netdev: relevant netdev struct
+ * @prio: corresponding user priority
+ * @prio_type: the traffic priority type
+ * @pgid: the PG ID
+ * @bw_pct: BW percentage for corresponding BWG
+ * @up_map: prio mapped to corresponding TC
+ *
+ * lldpad requires this function pointer to be non-NULL to complete CEE config.
+ */
+static void
+ice_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev,
+ int __always_unused prio,
+ u8 __always_unused prio_type,
+ u8 __always_unused pgid,
+ u8 __always_unused bw_pct,
+ u8 __always_unused up_map)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+ dev_dbg(ice_pf_to_dev(pf), "Rx TC PG Config Not Supported.\n");
+}
+
+/**
+ * ice_dcbnl_get_pg_bwg_cfg_rx - Get CEE PG BW Rx config
+ * @netdev: pointer to netdev struct
+ * @pgid: the corresponding traffic class
+ * @bw_pct: the BW percentage for the corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int __always_unused pgid,
+ u8 *bw_pct)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return;
+
+ *bw_pct = 0;
+}
+
+/**
+ * ice_dcbnl_set_pg_bwg_cfg_rx
+ * @netdev: the corresponding netdev
+ * @pgid: corresponding TC
+ * @bw_pct: BW percentage for given TC
+ *
+ * lldpad requires this function pointer to be non-NULL to complete CEE config.
+ */
+static void
+ice_dcbnl_set_pg_bwg_cfg_rx(struct net_device *netdev, int __always_unused pgid,
+ u8 __always_unused bw_pct)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+ dev_dbg(ice_pf_to_dev(pf), "Rx BWG PG Config Not Supported.\n");
+}
+
+/**
+ * ice_dcbnl_get_cap - Get DCBX capabilities of adapter
+ * @netdev: pointer to netdev struct
+ * @capid: the capability type
+ * @cap: the capability value
+ */
+static u8 ice_dcbnl_get_cap(struct net_device *netdev, int capid, u8 *cap)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+ if (!(test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)))
+ return ICE_DCB_NO_HW_CHG;
+
+ switch (capid) {
+ case DCB_CAP_ATTR_PG:
+ *cap = true;
+ break;
+ case DCB_CAP_ATTR_PFC:
+ *cap = true;
+ break;
+ case DCB_CAP_ATTR_UP2TC:
+ *cap = false;
+ break;
+ case DCB_CAP_ATTR_PG_TCS:
+ *cap = 0x80;
+ break;
+ case DCB_CAP_ATTR_PFC_TCS:
+ *cap = 0x80;
+ break;
+ case DCB_CAP_ATTR_GSP:
+ *cap = false;
+ break;
+ case DCB_CAP_ATTR_BCN:
+ *cap = false;
+ break;
+ case DCB_CAP_ATTR_DCBX:
+ *cap = pf->dcbx_cap;
+ break;
+ default:
+ *cap = false;
+ break;
+ }
+
+ dev_dbg(ice_pf_to_dev(pf), "DCBX Get Capability cap=%d capval=0x%x\n",
+ capid, *cap);
+ return 0;
+}
+
+/**
+ * ice_dcbnl_getapp - get CEE APP
+ * @netdev: pointer to netdev struct
+ * @idtype: the App selector
+ * @id: the App ethtype or port number
+ */
+static int ice_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct dcb_app app = {
+ .selector = idtype,
+ .protocol = id,
+ };
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return -EINVAL;
+
+ return dcb_getapp(netdev, &app);
+}
+
+/**
+ * ice_dcbnl_find_app - Search for APP in given DCB config
+ * @cfg: struct to hold DCBX config
+ * @app: struct to hold app data to look for
+ */
+static bool
+ice_dcbnl_find_app(struct ice_dcbx_cfg *cfg,
+ struct ice_dcb_app_priority_table *app)
+{
+ unsigned int i;
+
+ for (i = 0; i < cfg->numapps; i++) {
+ if (app->selector == cfg->app[i].selector &&
+ app->prot_id == cfg->app[i].prot_id &&
+ app->priority == cfg->app[i].priority)
+ return true;
+ }
+
+ return false;
+}
+
+#define ICE_BYTES_PER_DSCP_VAL 8
+
+/**
+ * ice_dcbnl_setapp - set local IEEE App config
+ * @netdev: relevant netdev struct
+ * @app: struct to hold app config info
+ */
+static int ice_dcbnl_setapp(struct net_device *netdev, struct dcb_app *app)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_dcb_app_priority_table new_app;
+ struct ice_dcbx_cfg *old_cfg, *new_cfg;
+ u8 max_tc;
+ int ret;
+
+ /* ONLY DSCP APP TLVs have operational significance */
+ if (app->selector != IEEE_8021QAZ_APP_SEL_DSCP)
+ return -EINVAL;
+
+ /* only allow APP TLVs in SW Mode */
+ if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) {
+ netdev_err(netdev, "can't do DSCP QoS when FW DCB agent active\n");
+ return -EINVAL;
+ }
+
+ if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+ return -EINVAL;
+
+ if (!ice_is_feature_supported(pf, ICE_F_DSCP))
+ return -EOPNOTSUPP;
+
+ if (app->protocol >= ICE_DSCP_NUM_VAL) {
+ netdev_err(netdev, "DSCP value 0x%04X out of range\n",
+ app->protocol);
+ return -EINVAL;
+ }
+
+ max_tc = pf->hw.func_caps.common_cap.maxtc;
+ if (app->priority >= max_tc) {
+ netdev_err(netdev, "TC %d out of range, max TC %d\n",
+ app->priority, max_tc);
+ return -EINVAL;
+ }
+
+ /* grab TC mutex */
+ mutex_lock(&pf->tc_mutex);
+
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+ old_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+ ret = dcb_ieee_setapp(netdev, app);
+ if (ret)
+ goto setapp_out;
+
+ if (test_and_set_bit(app->protocol, new_cfg->dscp_mapped)) {
+ netdev_err(netdev, "DSCP value 0x%04X already user mapped\n",
+ app->protocol);
+ ret = dcb_ieee_delapp(netdev, app);
+ if (ret)
+ netdev_err(netdev, "Failed to delete re-mapping TLV\n");
+ ret = -EINVAL;
+ goto setapp_out;
+ }
+
+ new_app.selector = app->selector;
+ new_app.prot_id = app->protocol;
+ new_app.priority = app->priority;
+
+ /* If port is not in DSCP mode, need to set */
+ if (old_cfg->pfc_mode == ICE_QOS_MODE_VLAN) {
+ int i, j;
+
+ /* set DSCP mode */
+ ret = ice_aq_set_pfc_mode(&pf->hw, ICE_AQC_PFC_DSCP_BASED_PFC,
+ NULL);
+ if (ret) {
+ netdev_err(netdev, "Failed to set DSCP PFC mode %d\n",
+ ret);
+ goto setapp_out;
+ }
+ netdev_info(netdev, "Switched QoS to L3 DSCP mode\n");
+
+ new_cfg->pfc_mode = ICE_QOS_MODE_DSCP;
+
+ /* set default DSCP QoS values */
+ new_cfg->etscfg.willing = 0;
+ new_cfg->pfc.pfccap = max_tc;
+ new_cfg->pfc.willing = 0;
+
+ for (i = 0; i < max_tc; i++)
+ for (j = 0; j < ICE_BYTES_PER_DSCP_VAL; j++) {
+ int dscp, offset;
+
+ dscp = (i * max_tc) + j;
+ offset = max_tc * ICE_BYTES_PER_DSCP_VAL;
+
+ new_cfg->dscp_map[dscp] = i;
+ /* if less that 8 TCs supported */
+ if (max_tc < ICE_MAX_TRAFFIC_CLASS)
+ new_cfg->dscp_map[dscp + offset] = i;
+ }
+
+ new_cfg->etscfg.tcbwtable[0] = 100;
+ new_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
+ new_cfg->etscfg.prio_table[0] = 0;
+
+ for (i = 1; i < max_tc; i++) {
+ new_cfg->etscfg.tcbwtable[i] = 0;
+ new_cfg->etscfg.tsatable[i] = ICE_IEEE_TSA_ETS;
+ new_cfg->etscfg.prio_table[i] = i;
+ }
+ } /* end of switching to DSCP mode */
+
+ /* apply new mapping for this DSCP value */
+ new_cfg->dscp_map[app->protocol] = app->priority;
+ new_cfg->app[new_cfg->numapps++] = new_app;
+
+ ret = ice_pf_dcb_cfg(pf, new_cfg, true);
+ /* return of zero indicates new cfg applied */
+ if (ret == ICE_DCB_HW_CHG_RST)
+ ice_dcbnl_devreset(netdev);
+ else
+ ret = ICE_DCB_NO_HW_CHG;
+
+setapp_out:
+ mutex_unlock(&pf->tc_mutex);
+ return ret;
+}
+
+/**
+ * ice_dcbnl_delapp - Delete local IEEE App config
+ * @netdev: relevant netdev
+ * @app: struct to hold app too delete
+ *
+ * Will not delete first application required by the FW
+ */
+static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_dcbx_cfg *old_cfg, *new_cfg;
+ unsigned int i, j;
+ int ret = 0;
+
+ if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) {
+ netdev_err(netdev, "can't delete DSCP netlink app when FW DCB agent is active\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&pf->tc_mutex);
+ old_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+ ret = dcb_ieee_delapp(netdev, app);
+ if (ret)
+ goto delapp_out;
+
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+ for (i = 0; i < new_cfg->numapps; i++) {
+ if (app->selector == new_cfg->app[i].selector &&
+ app->protocol == new_cfg->app[i].prot_id &&
+ app->priority == new_cfg->app[i].priority) {
+ new_cfg->app[i].selector = 0;
+ new_cfg->app[i].prot_id = 0;
+ new_cfg->app[i].priority = 0;
+ break;
+ }
+ }
+
+ /* Did not find DCB App */
+ if (i == new_cfg->numapps) {
+ ret = -EINVAL;
+ goto delapp_out;
+ }
+
+ new_cfg->numapps--;
+
+ for (j = i; j < new_cfg->numapps; j++) {
+ new_cfg->app[j].selector = old_cfg->app[j + 1].selector;
+ new_cfg->app[j].prot_id = old_cfg->app[j + 1].prot_id;
+ new_cfg->app[j].priority = old_cfg->app[j + 1].priority;
+ }
+
+ /* if not a DSCP APP TLV or DSCP is not supported, we are done */
+ if (app->selector != IEEE_8021QAZ_APP_SEL_DSCP ||
+ !ice_is_feature_supported(pf, ICE_F_DSCP)) {
+ ret = ICE_DCB_HW_CHG;
+ goto delapp_out;
+ }
+
+ /* if DSCP TLV, then need to address change in mapping */
+ clear_bit(app->protocol, new_cfg->dscp_mapped);
+ /* remap this DSCP value to default value */
+ new_cfg->dscp_map[app->protocol] = app->protocol %
+ ICE_BYTES_PER_DSCP_VAL;
+
+ /* if the last DSCP mapping just got deleted, need to switch
+ * to L2 VLAN QoS mode
+ */
+ if (bitmap_empty(new_cfg->dscp_mapped, ICE_DSCP_NUM_VAL) &&
+ new_cfg->pfc_mode == ICE_QOS_MODE_DSCP) {
+ ret = ice_aq_set_pfc_mode(&pf->hw,
+ ICE_AQC_PFC_VLAN_BASED_PFC,
+ NULL);
+ if (ret) {
+ netdev_info(netdev, "Failed to set VLAN PFC mode %d\n",
+ ret);
+ goto delapp_out;
+ }
+ netdev_info(netdev, "Switched QoS to L2 VLAN mode\n");
+
+ new_cfg->pfc_mode = ICE_QOS_MODE_VLAN;
+
+ ret = ice_dcb_sw_dflt_cfg(pf, true, true);
+ } else {
+ ret = ice_pf_dcb_cfg(pf, new_cfg, true);
+ }
+
+ /* return of ICE_DCB_HW_CHG_RST indicates new cfg applied
+ * and reset needs to be performed
+ */
+ if (ret == ICE_DCB_HW_CHG_RST)
+ ice_dcbnl_devreset(netdev);
+
+ /* if the change was not siginificant enough to actually call
+ * the reconfiguration flow, we still need to tell caller that
+ * their request was successfully handled
+ */
+ if (ret == ICE_DCB_NO_HW_CHG)
+ ret = ICE_DCB_HW_CHG;
+
+delapp_out:
+ mutex_unlock(&pf->tc_mutex);
+ return ret;
+}
+
+/**
+ * ice_dcbnl_cee_set_all - Commit CEE DCB settings to HW
+ * @netdev: the corresponding netdev
+ */
+static u8 ice_dcbnl_cee_set_all(struct net_device *netdev)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_dcbx_cfg *new_cfg;
+ int err;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return ICE_DCB_NO_HW_CHG;
+
+ new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+ mutex_lock(&pf->tc_mutex);
+
+ err = ice_pf_dcb_cfg(pf, new_cfg, true);
+
+ mutex_unlock(&pf->tc_mutex);
+ return (err != ICE_DCB_HW_CHG_RST) ? ICE_DCB_NO_HW_CHG : err;
+}
+
+static const struct dcbnl_rtnl_ops dcbnl_ops = {
+ /* IEEE 802.1Qaz std */
+ .ieee_getets = ice_dcbnl_getets,
+ .ieee_setets = ice_dcbnl_setets,
+ .ieee_getpfc = ice_dcbnl_getpfc,
+ .ieee_setpfc = ice_dcbnl_setpfc,
+ .ieee_setapp = ice_dcbnl_setapp,
+ .ieee_delapp = ice_dcbnl_delapp,
+
+ /* CEE std */
+ .getstate = ice_dcbnl_getstate,
+ .setstate = ice_dcbnl_setstate,
+ .getpermhwaddr = ice_dcbnl_get_perm_hw_addr,
+ .setpgtccfgtx = ice_dcbnl_set_pg_tc_cfg_tx,
+ .setpgbwgcfgtx = ice_dcbnl_set_pg_bwg_cfg_tx,
+ .setpgtccfgrx = ice_dcbnl_set_pg_tc_cfg_rx,
+ .setpgbwgcfgrx = ice_dcbnl_set_pg_bwg_cfg_rx,
+ .getpgtccfgtx = ice_dcbnl_get_pg_tc_cfg_tx,
+ .getpgbwgcfgtx = ice_dcbnl_get_pg_bwg_cfg_tx,
+ .getpgtccfgrx = ice_dcbnl_get_pg_tc_cfg_rx,
+ .getpgbwgcfgrx = ice_dcbnl_get_pg_bwg_cfg_rx,
+ .setpfccfg = ice_dcbnl_set_pfc_cfg,
+ .getpfccfg = ice_dcbnl_get_pfc_cfg,
+ .setall = ice_dcbnl_cee_set_all,
+ .getcap = ice_dcbnl_get_cap,
+ .getnumtcs = ice_dcbnl_getnumtcs,
+ .getpfcstate = ice_dcbnl_getpfcstate,
+ .getapp = ice_dcbnl_getapp,
+
+ /* DCBX configuration */
+ .getdcbx = ice_dcbnl_getdcbx,
+ .setdcbx = ice_dcbnl_setdcbx,
+};
+
+/**
+ * ice_dcbnl_set_all - set all the apps and ieee data from DCBX config
+ * @vsi: pointer to VSI struct
+ */
+void ice_dcbnl_set_all(struct ice_vsi *vsi)
+{
+ struct net_device *netdev = vsi->netdev;
+ struct ice_dcbx_cfg *dcbxcfg;
+ struct ice_port_info *pi;
+ struct dcb_app sapp;
+ struct ice_pf *pf;
+ unsigned int i;
+
+ if (!netdev)
+ return;
+
+ pf = ice_netdev_to_pf(netdev);
+ pi = pf->hw.port_info;
+
+ /* SW DCB taken care of by SW Default Config */
+ if (pf->dcbx_cap & DCB_CAP_DCBX_HOST)
+ return;
+
+ /* DCB not enabled */
+ if (!test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+ return;
+
+ dcbxcfg = &pi->qos_cfg.local_dcbx_cfg;
+
+ for (i = 0; i < dcbxcfg->numapps; i++) {
+ u8 prio, tc_map;
+
+ prio = dcbxcfg->app[i].priority;
+ tc_map = BIT(dcbxcfg->etscfg.prio_table[prio]);
+
+ /* Add APP only if the TC is enabled for this VSI */
+ if (tc_map & vsi->tc_cfg.ena_tc) {
+ sapp.selector = dcbxcfg->app[i].selector;
+ sapp.protocol = dcbxcfg->app[i].prot_id;
+ sapp.priority = prio;
+ dcb_ieee_setapp(netdev, &sapp);
+ }
+ }
+ /* Notify user-space of the changes */
+ dcbnl_ieee_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_SET, 0, 0);
+}
+
+/**
+ * ice_dcbnl_vsi_del_app - Delete APP on all VSIs
+ * @vsi: pointer to the main VSI
+ * @app: APP to delete
+ *
+ * Delete given APP from all the VSIs for given PF
+ */
+static void
+ice_dcbnl_vsi_del_app(struct ice_vsi *vsi,
+ struct ice_dcb_app_priority_table *app)
+{
+ struct dcb_app sapp;
+ int err;
+
+ sapp.selector = app->selector;
+ sapp.protocol = app->prot_id;
+ sapp.priority = app->priority;
+ err = ice_dcbnl_delapp(vsi->netdev, &sapp);
+ dev_dbg(ice_pf_to_dev(vsi->back), "Deleting app for VSI idx=%d err=%d sel=%d proto=0x%x, prio=%d\n",
+ vsi->idx, err, app->selector, app->prot_id, app->priority);
+}
+
+/**
+ * ice_dcbnl_flush_apps - Delete all removed APPs
+ * @pf: the corresponding PF
+ * @old_cfg: old DCBX configuration data
+ * @new_cfg: new DCBX configuration data
+ *
+ * Find and delete all APPS that are not present in the passed
+ * DCB configuration
+ */
+void
+ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+ struct ice_dcbx_cfg *new_cfg)
+{
+ struct ice_vsi *main_vsi = ice_get_main_vsi(pf);
+ unsigned int i;
+
+ if (!main_vsi)
+ return;
+
+ for (i = 0; i < old_cfg->numapps; i++) {
+ struct ice_dcb_app_priority_table app = old_cfg->app[i];
+
+ /* The APP is not available anymore delete it */
+ if (!ice_dcbnl_find_app(new_cfg, &app))
+ ice_dcbnl_vsi_del_app(main_vsi, &app);
+ }
+}
+
+/**
+ * ice_dcbnl_setup - setup DCBNL
+ * @vsi: VSI to get associated netdev from
+ */
+void ice_dcbnl_setup(struct ice_vsi *vsi)
+{
+ struct net_device *netdev = vsi->netdev;
+ struct ice_pf *pf;
+
+ pf = ice_netdev_to_pf(netdev);
+ if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))
+ return;
+
+ netdev->dcbnl_ops = &dcbnl_ops;
+ ice_dcbnl_set_all(vsi);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.h b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h
new file mode 100644
index 000000000..eac2f34bd
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DCB_NL_H_
+#define _ICE_DCB_NL_H_
+
+#ifdef CONFIG_DCB
+void ice_dcbnl_setup(struct ice_vsi *vsi);
+void ice_dcbnl_set_all(struct ice_vsi *vsi);
+void
+ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+ struct ice_dcbx_cfg *new_cfg);
+#else
+static inline void ice_dcbnl_setup(struct ice_vsi *vsi) { }
+static inline void ice_dcbnl_set_all(struct ice_vsi *vsi) { }
+static inline void
+ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+ struct ice_dcbx_cfg *new_cfg) { }
+#endif /* CONFIG_DCB */
+#endif /* _ICE_DCB_NL_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h
new file mode 100644
index 000000000..6d560d1c7
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_devids.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_DEVIDS_H_
+#define _ICE_DEVIDS_H_
+
+/* Device IDs */
+#define ICE_DEV_ID_E822_SI_DFLT 0x1888
+/* Intel(R) Ethernet Connection E823-L for backplane */
+#define ICE_DEV_ID_E823L_BACKPLANE 0x124C
+/* Intel(R) Ethernet Connection E823-L for SFP */
+#define ICE_DEV_ID_E823L_SFP 0x124D
+/* Intel(R) Ethernet Connection E823-L/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_E823L_10G_BASE_T 0x124E
+/* Intel(R) Ethernet Connection E823-L 1GbE */
+#define ICE_DEV_ID_E823L_1GBE 0x124F
+/* Intel(R) Ethernet Connection E823-L for QSFP */
+#define ICE_DEV_ID_E823L_QSFP 0x151D
+/* Intel(R) Ethernet Controller E810-C for backplane */
+#define ICE_DEV_ID_E810C_BACKPLANE 0x1591
+/* Intel(R) Ethernet Controller E810-C for QSFP */
+#define ICE_DEV_ID_E810C_QSFP 0x1592
+/* Intel(R) Ethernet Controller E810-C for SFP */
+#define ICE_DEV_ID_E810C_SFP 0x1593
+#define ICE_SUBDEV_ID_E810T 0x000E
+#define ICE_SUBDEV_ID_E810T2 0x000F
+#define ICE_SUBDEV_ID_E810T3 0x0010
+#define ICE_SUBDEV_ID_E810T4 0x0011
+#define ICE_SUBDEV_ID_E810T5 0x0012
+#define ICE_SUBDEV_ID_E810T6 0x02E9
+#define ICE_SUBDEV_ID_E810T7 0x02EA
+/* Intel(R) Ethernet Controller E810-XXV for backplane */
+#define ICE_DEV_ID_E810_XXV_BACKPLANE 0x1599
+/* Intel(R) Ethernet Controller E810-XXV for QSFP */
+#define ICE_DEV_ID_E810_XXV_QSFP 0x159A
+/* Intel(R) Ethernet Controller E810-XXV for SFP */
+#define ICE_DEV_ID_E810_XXV_SFP 0x159B
+/* Intel(R) Ethernet Connection E823-C for backplane */
+#define ICE_DEV_ID_E823C_BACKPLANE 0x188A
+/* Intel(R) Ethernet Connection E823-C for QSFP */
+#define ICE_DEV_ID_E823C_QSFP 0x188B
+/* Intel(R) Ethernet Connection E823-C for SFP */
+#define ICE_DEV_ID_E823C_SFP 0x188C
+/* Intel(R) Ethernet Connection E823-C/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_E823C_10G_BASE_T 0x188D
+/* Intel(R) Ethernet Connection E823-C 1GbE */
+#define ICE_DEV_ID_E823C_SGMII 0x188E
+/* Intel(R) Ethernet Connection E822-C for backplane */
+#define ICE_DEV_ID_E822C_BACKPLANE 0x1890
+/* Intel(R) Ethernet Connection E822-C for QSFP */
+#define ICE_DEV_ID_E822C_QSFP 0x1891
+/* Intel(R) Ethernet Connection E822-C for SFP */
+#define ICE_DEV_ID_E822C_SFP 0x1892
+/* Intel(R) Ethernet Connection E822-C/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_E822C_10G_BASE_T 0x1893
+/* Intel(R) Ethernet Connection E822-C 1GbE */
+#define ICE_DEV_ID_E822C_SGMII 0x1894
+/* Intel(R) Ethernet Connection E822-L for backplane */
+#define ICE_DEV_ID_E822L_BACKPLANE 0x1897
+/* Intel(R) Ethernet Connection E822-L for SFP */
+#define ICE_DEV_ID_E822L_SFP 0x1898
+/* Intel(R) Ethernet Connection E822-L/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_E822L_10G_BASE_T 0x1899
+/* Intel(R) Ethernet Connection E822-L 1GbE */
+#define ICE_DEV_ID_E822L_SGMII 0x189A
+
+#endif /* _ICE_DEVIDS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
new file mode 100644
index 000000000..e6ec20079
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
@@ -0,0 +1,1366 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Intel Corporation. */
+
+#include <linux/vmalloc.h>
+
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_devlink.h"
+#include "ice_eswitch.h"
+#include "ice_fw_update.h"
+
+static int ice_active_port_option = -1;
+
+/* context for devlink info version reporting */
+struct ice_info_ctx {
+ char buf[128];
+ struct ice_orom_info pending_orom;
+ struct ice_nvm_info pending_nvm;
+ struct ice_netlist_info pending_netlist;
+ struct ice_hw_dev_caps dev_caps;
+};
+
+/* The following functions are used to format specific strings for various
+ * devlink info versions. The ctx parameter is used to provide the storage
+ * buffer, as well as any ancillary information calculated when the info
+ * request was made.
+ *
+ * If a version does not exist, for example when attempting to get the
+ * inactive version of flash when there is no pending update, the function
+ * should leave the buffer in the ctx structure empty.
+ */
+
+static void ice_info_get_dsn(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+ u8 dsn[8];
+
+ /* Copy the DSN into an array in Big Endian format */
+ put_unaligned_be64(pci_get_dsn(pf->pdev), dsn);
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "%8phD", dsn);
+}
+
+static void ice_info_pba(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+ struct ice_hw *hw = &pf->hw;
+ int status;
+
+ status = ice_read_pba_string(hw, (u8 *)ctx->buf, sizeof(ctx->buf));
+ if (status)
+ /* We failed to locate the PBA, so just skip this entry */
+ dev_dbg(ice_pf_to_dev(pf), "Failed to read Product Board Assembly string, status %d\n",
+ status);
+}
+
+static void ice_info_fw_mgmt(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+ struct ice_hw *hw = &pf->hw;
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u",
+ hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch);
+}
+
+static void ice_info_fw_api(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+ struct ice_hw *hw = &pf->hw;
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", hw->api_maj_ver,
+ hw->api_min_ver, hw->api_patch);
+}
+
+static void ice_info_fw_build(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+ struct ice_hw *hw = &pf->hw;
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", hw->fw_build);
+}
+
+static void ice_info_orom_ver(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+ struct ice_orom_info *orom = &pf->hw.flash.orom;
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u",
+ orom->major, orom->build, orom->patch);
+}
+
+static void
+ice_info_pending_orom_ver(struct ice_pf __always_unused *pf,
+ struct ice_info_ctx *ctx)
+{
+ struct ice_orom_info *orom = &ctx->pending_orom;
+
+ if (ctx->dev_caps.common_cap.nvm_update_pending_orom)
+ snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u",
+ orom->major, orom->build, orom->patch);
+}
+
+static void ice_info_nvm_ver(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+ struct ice_nvm_info *nvm = &pf->hw.flash.nvm;
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x", nvm->major, nvm->minor);
+}
+
+static void
+ice_info_pending_nvm_ver(struct ice_pf __always_unused *pf,
+ struct ice_info_ctx *ctx)
+{
+ struct ice_nvm_info *nvm = &ctx->pending_nvm;
+
+ if (ctx->dev_caps.common_cap.nvm_update_pending_nvm)
+ snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x",
+ nvm->major, nvm->minor);
+}
+
+static void ice_info_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+ struct ice_nvm_info *nvm = &pf->hw.flash.nvm;
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack);
+}
+
+static void
+ice_info_pending_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+ struct ice_nvm_info *nvm = &ctx->pending_nvm;
+
+ if (ctx->dev_caps.common_cap.nvm_update_pending_nvm)
+ snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack);
+}
+
+static void ice_info_ddp_pkg_name(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+ struct ice_hw *hw = &pf->hw;
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "%s", hw->active_pkg_name);
+}
+
+static void
+ice_info_ddp_pkg_version(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+ struct ice_pkg_ver *pkg = &pf->hw.active_pkg_ver;
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u.%u",
+ pkg->major, pkg->minor, pkg->update, pkg->draft);
+}
+
+static void
+ice_info_ddp_pkg_bundle_id(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+ snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", pf->hw.active_track_id);
+}
+
+static void ice_info_netlist_ver(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+ struct ice_netlist_info *netlist = &pf->hw.flash.netlist;
+
+ /* The netlist version fields are BCD formatted */
+ snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x",
+ netlist->major, netlist->minor,
+ netlist->type >> 16, netlist->type & 0xFFFF,
+ netlist->rev, netlist->cust_ver);
+}
+
+static void ice_info_netlist_build(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+ struct ice_netlist_info *netlist = &pf->hw.flash.netlist;
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash);
+}
+
+static void
+ice_info_pending_netlist_ver(struct ice_pf __always_unused *pf,
+ struct ice_info_ctx *ctx)
+{
+ struct ice_netlist_info *netlist = &ctx->pending_netlist;
+
+ /* The netlist version fields are BCD formatted */
+ if (ctx->dev_caps.common_cap.nvm_update_pending_netlist)
+ snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x",
+ netlist->major, netlist->minor,
+ netlist->type >> 16, netlist->type & 0xFFFF,
+ netlist->rev, netlist->cust_ver);
+}
+
+static void
+ice_info_pending_netlist_build(struct ice_pf __always_unused *pf,
+ struct ice_info_ctx *ctx)
+{
+ struct ice_netlist_info *netlist = &ctx->pending_netlist;
+
+ if (ctx->dev_caps.common_cap.nvm_update_pending_netlist)
+ snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash);
+}
+
+#define fixed(key, getter) { ICE_VERSION_FIXED, key, getter, NULL }
+#define running(key, getter) { ICE_VERSION_RUNNING, key, getter, NULL }
+#define stored(key, getter, fallback) { ICE_VERSION_STORED, key, getter, fallback }
+
+/* The combined() macro inserts both the running entry as well as a stored
+ * entry. The running entry will always report the version from the active
+ * handler. The stored entry will first try the pending handler, and fallback
+ * to the active handler if the pending function does not report a version.
+ * The pending handler should check the status of a pending update for the
+ * relevant flash component. It should only fill in the buffer in the case
+ * where a valid pending version is available. This ensures that the related
+ * stored and running versions remain in sync, and that stored versions are
+ * correctly reported as expected.
+ */
+#define combined(key, active, pending) \
+ running(key, active), \
+ stored(key, pending, active)
+
+enum ice_version_type {
+ ICE_VERSION_FIXED,
+ ICE_VERSION_RUNNING,
+ ICE_VERSION_STORED,
+};
+
+static const struct ice_devlink_version {
+ enum ice_version_type type;
+ const char *key;
+ void (*getter)(struct ice_pf *pf, struct ice_info_ctx *ctx);
+ void (*fallback)(struct ice_pf *pf, struct ice_info_ctx *ctx);
+} ice_devlink_versions[] = {
+ fixed(DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, ice_info_pba),
+ running(DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, ice_info_fw_mgmt),
+ running("fw.mgmt.api", ice_info_fw_api),
+ running("fw.mgmt.build", ice_info_fw_build),
+ combined(DEVLINK_INFO_VERSION_GENERIC_FW_UNDI, ice_info_orom_ver, ice_info_pending_orom_ver),
+ combined("fw.psid.api", ice_info_nvm_ver, ice_info_pending_nvm_ver),
+ combined(DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, ice_info_eetrack, ice_info_pending_eetrack),
+ running("fw.app.name", ice_info_ddp_pkg_name),
+ running(DEVLINK_INFO_VERSION_GENERIC_FW_APP, ice_info_ddp_pkg_version),
+ running("fw.app.bundle_id", ice_info_ddp_pkg_bundle_id),
+ combined("fw.netlist", ice_info_netlist_ver, ice_info_pending_netlist_ver),
+ combined("fw.netlist.build", ice_info_netlist_build, ice_info_pending_netlist_build),
+};
+
+/**
+ * ice_devlink_info_get - .info_get devlink handler
+ * @devlink: devlink instance structure
+ * @req: the devlink info request
+ * @extack: extended netdev ack structure
+ *
+ * Callback for the devlink .info_get operation. Reports information about the
+ * device.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int ice_devlink_info_get(struct devlink *devlink,
+ struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_pf *pf = devlink_priv(devlink);
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ struct ice_info_ctx *ctx;
+ size_t i;
+ int err;
+
+ err = ice_wait_for_reset(pf, 10 * HZ);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Device is busy resetting");
+ return err;
+ }
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ /* discover capabilities first */
+ err = ice_discover_dev_caps(hw, &ctx->dev_caps);
+ if (err) {
+ dev_dbg(dev, "Failed to discover device capabilities, status %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ NL_SET_ERR_MSG_MOD(extack, "Unable to discover device capabilities");
+ goto out_free_ctx;
+ }
+
+ if (ctx->dev_caps.common_cap.nvm_update_pending_orom) {
+ err = ice_get_inactive_orom_ver(hw, &ctx->pending_orom);
+ if (err) {
+ dev_dbg(dev, "Unable to read inactive Option ROM version data, status %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+
+ /* disable display of pending Option ROM */
+ ctx->dev_caps.common_cap.nvm_update_pending_orom = false;
+ }
+ }
+
+ if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) {
+ err = ice_get_inactive_nvm_ver(hw, &ctx->pending_nvm);
+ if (err) {
+ dev_dbg(dev, "Unable to read inactive NVM version data, status %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+
+ /* disable display of pending Option ROM */
+ ctx->dev_caps.common_cap.nvm_update_pending_nvm = false;
+ }
+ }
+
+ if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) {
+ err = ice_get_inactive_netlist_ver(hw, &ctx->pending_netlist);
+ if (err) {
+ dev_dbg(dev, "Unable to read inactive Netlist version data, status %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+
+ /* disable display of pending Option ROM */
+ ctx->dev_caps.common_cap.nvm_update_pending_netlist = false;
+ }
+ }
+
+ err = devlink_info_driver_name_put(req, KBUILD_MODNAME);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Unable to set driver name");
+ goto out_free_ctx;
+ }
+
+ ice_info_get_dsn(pf, ctx);
+
+ err = devlink_info_serial_number_put(req, ctx->buf);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Unable to set serial number");
+ goto out_free_ctx;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ice_devlink_versions); i++) {
+ enum ice_version_type type = ice_devlink_versions[i].type;
+ const char *key = ice_devlink_versions[i].key;
+
+ memset(ctx->buf, 0, sizeof(ctx->buf));
+
+ ice_devlink_versions[i].getter(pf, ctx);
+
+ /* If the default getter doesn't report a version, use the
+ * fallback function. This is primarily useful in the case of
+ * "stored" versions that want to report the same value as the
+ * running version in the normal case of no pending update.
+ */
+ if (ctx->buf[0] == '\0' && ice_devlink_versions[i].fallback)
+ ice_devlink_versions[i].fallback(pf, ctx);
+
+ /* Do not report missing versions */
+ if (ctx->buf[0] == '\0')
+ continue;
+
+ switch (type) {
+ case ICE_VERSION_FIXED:
+ err = devlink_info_version_fixed_put(req, key, ctx->buf);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Unable to set fixed version");
+ goto out_free_ctx;
+ }
+ break;
+ case ICE_VERSION_RUNNING:
+ err = devlink_info_version_running_put(req, key, ctx->buf);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Unable to set running version");
+ goto out_free_ctx;
+ }
+ break;
+ case ICE_VERSION_STORED:
+ err = devlink_info_version_stored_put(req, key, ctx->buf);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Unable to set stored version");
+ goto out_free_ctx;
+ }
+ break;
+ }
+ }
+
+out_free_ctx:
+ kfree(ctx);
+ return err;
+}
+
+/**
+ * ice_devlink_reload_empr_start - Start EMP reset to activate new firmware
+ * @devlink: pointer to the devlink instance to reload
+ * @netns_change: if true, the network namespace is changing
+ * @action: the action to perform. Must be DEVLINK_RELOAD_ACTION_FW_ACTIVATE
+ * @limit: limits on what reload should do, such as not resetting
+ * @extack: netlink extended ACK structure
+ *
+ * Allow user to activate new Embedded Management Processor firmware by
+ * issuing device specific EMP reset. Called in response to
+ * a DEVLINK_CMD_RELOAD with the DEVLINK_RELOAD_ACTION_FW_ACTIVATE.
+ *
+ * Note that teardown and rebuild of the driver state happens automatically as
+ * part of an interrupt and watchdog task. This is because all physical
+ * functions on the device must be able to reset when an EMP reset occurs from
+ * any source.
+ */
+static int
+ice_devlink_reload_empr_start(struct devlink *devlink, bool netns_change,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_pf *pf = devlink_priv(devlink);
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ u8 pending;
+ int err;
+
+ err = ice_get_pending_updates(pf, &pending, extack);
+ if (err)
+ return err;
+
+ /* pending is a bitmask of which flash banks have a pending update,
+ * including the main NVM bank, the Option ROM bank, and the netlist
+ * bank. If any of these bits are set, then there is a pending update
+ * waiting to be activated.
+ */
+ if (!pending) {
+ NL_SET_ERR_MSG_MOD(extack, "No pending firmware update");
+ return -ECANCELED;
+ }
+
+ if (pf->fw_emp_reset_disabled) {
+ NL_SET_ERR_MSG_MOD(extack, "EMP reset is not available. To activate firmware, a reboot or power cycle is needed");
+ return -ECANCELED;
+ }
+
+ dev_dbg(dev, "Issuing device EMP reset to activate firmware\n");
+
+ err = ice_aq_nvm_update_empr(hw);
+ if (err) {
+ dev_err(dev, "Failed to trigger EMP device reset to reload firmware, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ NL_SET_ERR_MSG_MOD(extack, "Failed to trigger EMP device reset to reload firmware");
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_devlink_reload_empr_finish - Wait for EMP reset to finish
+ * @devlink: pointer to the devlink instance reloading
+ * @action: the action requested
+ * @limit: limits imposed by userspace, such as not resetting
+ * @actions_performed: on return, indicate what actions actually performed
+ * @extack: netlink extended ACK structure
+ *
+ * Wait for driver to finish rebuilding after EMP reset is completed. This
+ * includes time to wait for both the actual device reset as well as the time
+ * for the driver's rebuild to complete.
+ */
+static int
+ice_devlink_reload_empr_finish(struct devlink *devlink,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
+ u32 *actions_performed,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_pf *pf = devlink_priv(devlink);
+ int err;
+
+ *actions_performed = BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE);
+
+ err = ice_wait_for_reset(pf, 60 * HZ);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Device still resetting after 1 minute");
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_devlink_port_opt_speed_str - convert speed to a string
+ * @speed: speed value
+ */
+static const char *ice_devlink_port_opt_speed_str(u8 speed)
+{
+ switch (speed & ICE_AQC_PORT_OPT_MAX_LANE_M) {
+ case ICE_AQC_PORT_OPT_MAX_LANE_100M:
+ return "0.1";
+ case ICE_AQC_PORT_OPT_MAX_LANE_1G:
+ return "1";
+ case ICE_AQC_PORT_OPT_MAX_LANE_2500M:
+ return "2.5";
+ case ICE_AQC_PORT_OPT_MAX_LANE_5G:
+ return "5";
+ case ICE_AQC_PORT_OPT_MAX_LANE_10G:
+ return "10";
+ case ICE_AQC_PORT_OPT_MAX_LANE_25G:
+ return "25";
+ case ICE_AQC_PORT_OPT_MAX_LANE_50G:
+ return "50";
+ case ICE_AQC_PORT_OPT_MAX_LANE_100G:
+ return "100";
+ }
+
+ return "-";
+}
+
+#define ICE_PORT_OPT_DESC_LEN 50
+/**
+ * ice_devlink_port_options_print - Print available port split options
+ * @pf: the PF to print split port options
+ *
+ * Prints a table with available port split options and max port speeds
+ */
+static void ice_devlink_port_options_print(struct ice_pf *pf)
+{
+ u8 i, j, options_count, cnt, speed, pending_idx, active_idx;
+ struct ice_aqc_get_port_options_elem *options, *opt;
+ struct device *dev = ice_pf_to_dev(pf);
+ bool active_valid, pending_valid;
+ char desc[ICE_PORT_OPT_DESC_LEN];
+ const char *str;
+ int status;
+
+ options = kcalloc(ICE_AQC_PORT_OPT_MAX * ICE_MAX_PORT_PER_PCI_DEV,
+ sizeof(*options), GFP_KERNEL);
+ if (!options)
+ return;
+
+ for (i = 0; i < ICE_MAX_PORT_PER_PCI_DEV; i++) {
+ opt = options + i * ICE_AQC_PORT_OPT_MAX;
+ options_count = ICE_AQC_PORT_OPT_MAX;
+ active_valid = 0;
+
+ status = ice_aq_get_port_options(&pf->hw, opt, &options_count,
+ i, true, &active_idx,
+ &active_valid, &pending_idx,
+ &pending_valid);
+ if (status) {
+ dev_dbg(dev, "Couldn't read port option for port %d, err %d\n",
+ i, status);
+ goto err;
+ }
+ }
+
+ dev_dbg(dev, "Available port split options and max port speeds (Gbps):\n");
+ dev_dbg(dev, "Status Split Quad 0 Quad 1\n");
+ dev_dbg(dev, " count L0 L1 L2 L3 L4 L5 L6 L7\n");
+
+ for (i = 0; i < options_count; i++) {
+ cnt = 0;
+
+ if (i == ice_active_port_option)
+ str = "Active";
+ else if ((i == pending_idx) && pending_valid)
+ str = "Pending";
+ else
+ str = "";
+
+ cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt,
+ "%-8s", str);
+
+ cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt,
+ "%-6u", options[i].pmd);
+
+ for (j = 0; j < ICE_MAX_PORT_PER_PCI_DEV; ++j) {
+ speed = options[i + j * ICE_AQC_PORT_OPT_MAX].max_lane_speed;
+ str = ice_devlink_port_opt_speed_str(speed);
+ cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt,
+ "%3s ", str);
+ }
+
+ dev_dbg(dev, "%s\n", desc);
+ }
+
+err:
+ kfree(options);
+}
+
+/**
+ * ice_devlink_aq_set_port_option - Send set port option admin queue command
+ * @pf: the PF to print split port options
+ * @option_idx: selected port option
+ * @extack: extended netdev ack structure
+ *
+ * Sends set port option admin queue command with selected port option and
+ * calls NVM write activate.
+ */
+static int
+ice_devlink_aq_set_port_option(struct ice_pf *pf, u8 option_idx,
+ struct netlink_ext_ack *extack)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ int status;
+
+ status = ice_aq_set_port_option(&pf->hw, 0, true, option_idx);
+ if (status) {
+ dev_dbg(dev, "ice_aq_set_port_option, err %d aq_err %d\n",
+ status, pf->hw.adminq.sq_last_status);
+ NL_SET_ERR_MSG_MOD(extack, "Port split request failed");
+ return -EIO;
+ }
+
+ status = ice_acquire_nvm(&pf->hw, ICE_RES_WRITE);
+ if (status) {
+ dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n",
+ status, pf->hw.adminq.sq_last_status);
+ NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore");
+ return -EIO;
+ }
+
+ status = ice_nvm_write_activate(&pf->hw, ICE_AQC_NVM_ACTIV_REQ_EMPR, NULL);
+ if (status) {
+ dev_dbg(dev, "ice_nvm_write_activate failed, err %d aq_err %d\n",
+ status, pf->hw.adminq.sq_last_status);
+ NL_SET_ERR_MSG_MOD(extack, "Port split request failed to save data");
+ ice_release_nvm(&pf->hw);
+ return -EIO;
+ }
+
+ ice_release_nvm(&pf->hw);
+
+ NL_SET_ERR_MSG_MOD(extack, "Reboot required to finish port split");
+ return 0;
+}
+
+/**
+ * ice_devlink_port_split - .port_split devlink handler
+ * @devlink: devlink instance structure
+ * @port: devlink port structure
+ * @count: number of ports to split to
+ * @extack: extended netdev ack structure
+ *
+ * Callback for the devlink .port_split operation.
+ *
+ * Unfortunately, the devlink expression of available options is limited
+ * to just a number, so search for an FW port option which supports
+ * the specified number. As there could be multiple FW port options with
+ * the same port split count, allow switching between them. When the same
+ * port split count request is issued again, switch to the next FW port
+ * option with the same port split count.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int
+ice_devlink_port_split(struct devlink *devlink, struct devlink_port *port,
+ unsigned int count, struct netlink_ext_ack *extack)
+{
+ struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX];
+ u8 i, j, active_idx, pending_idx, new_option;
+ struct ice_pf *pf = devlink_priv(devlink);
+ u8 option_count = ICE_AQC_PORT_OPT_MAX;
+ struct device *dev = ice_pf_to_dev(pf);
+ bool active_valid, pending_valid;
+ int status;
+
+ status = ice_aq_get_port_options(&pf->hw, options, &option_count,
+ 0, true, &active_idx, &active_valid,
+ &pending_idx, &pending_valid);
+ if (status) {
+ dev_dbg(dev, "Couldn't read port split options, err = %d\n",
+ status);
+ NL_SET_ERR_MSG_MOD(extack, "Failed to get available port split options");
+ return -EIO;
+ }
+
+ new_option = ICE_AQC_PORT_OPT_MAX;
+ active_idx = pending_valid ? pending_idx : active_idx;
+ for (i = 1; i <= option_count; i++) {
+ /* In order to allow switching between FW port options with
+ * the same port split count, search for a new option starting
+ * from the active/pending option (with array wrap around).
+ */
+ j = (active_idx + i) % option_count;
+
+ if (count == options[j].pmd) {
+ new_option = j;
+ break;
+ }
+ }
+
+ if (new_option == active_idx) {
+ dev_dbg(dev, "request to split: count: %u is already set and there are no other options\n",
+ count);
+ NL_SET_ERR_MSG_MOD(extack, "Requested split count is already set");
+ ice_devlink_port_options_print(pf);
+ return -EINVAL;
+ }
+
+ if (new_option == ICE_AQC_PORT_OPT_MAX) {
+ dev_dbg(dev, "request to split: count: %u not found\n", count);
+ NL_SET_ERR_MSG_MOD(extack, "Port split requested unsupported port config");
+ ice_devlink_port_options_print(pf);
+ return -EINVAL;
+ }
+
+ status = ice_devlink_aq_set_port_option(pf, new_option, extack);
+ if (status)
+ return status;
+
+ ice_devlink_port_options_print(pf);
+
+ return 0;
+}
+
+/**
+ * ice_devlink_port_unsplit - .port_unsplit devlink handler
+ * @devlink: devlink instance structure
+ * @port: devlink port structure
+ * @extack: extended netdev ack structure
+ *
+ * Callback for the devlink .port_unsplit operation.
+ * Calls ice_devlink_port_split with split count set to 1.
+ * There could be no FW option available with split count 1.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int
+ice_devlink_port_unsplit(struct devlink *devlink, struct devlink_port *port,
+ struct netlink_ext_ack *extack)
+{
+ return ice_devlink_port_split(devlink, port, 1, extack);
+}
+
+static const struct devlink_ops ice_devlink_ops = {
+ .supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK,
+ .reload_actions = BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE),
+ /* The ice driver currently does not support driver reinit */
+ .reload_down = ice_devlink_reload_empr_start,
+ .reload_up = ice_devlink_reload_empr_finish,
+ .port_split = ice_devlink_port_split,
+ .port_unsplit = ice_devlink_port_unsplit,
+ .eswitch_mode_get = ice_eswitch_mode_get,
+ .eswitch_mode_set = ice_eswitch_mode_set,
+ .info_get = ice_devlink_info_get,
+ .flash_update = ice_devlink_flash_update,
+};
+
+static int
+ice_devlink_enable_roce_get(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct ice_pf *pf = devlink_priv(devlink);
+
+ ctx->val.vbool = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? true : false;
+
+ return 0;
+}
+
+static int
+ice_devlink_enable_roce_set(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct ice_pf *pf = devlink_priv(devlink);
+ bool roce_ena = ctx->val.vbool;
+ int ret;
+
+ if (!roce_ena) {
+ ice_unplug_aux_dev(pf);
+ pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_ROCEV2;
+ return 0;
+ }
+
+ pf->rdma_mode |= IIDC_RDMA_PROTOCOL_ROCEV2;
+ ret = ice_plug_aux_dev(pf);
+ if (ret)
+ pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_ROCEV2;
+
+ return ret;
+}
+
+static int
+ice_devlink_enable_roce_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_pf *pf = devlink_priv(devlink);
+
+ if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags))
+ return -EOPNOTSUPP;
+
+ if (pf->rdma_mode & IIDC_RDMA_PROTOCOL_IWARP) {
+ NL_SET_ERR_MSG_MOD(extack, "iWARP is currently enabled. This device cannot enable iWARP and RoCEv2 simultaneously");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+ice_devlink_enable_iw_get(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct ice_pf *pf = devlink_priv(devlink);
+
+ ctx->val.vbool = pf->rdma_mode & IIDC_RDMA_PROTOCOL_IWARP;
+
+ return 0;
+}
+
+static int
+ice_devlink_enable_iw_set(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct ice_pf *pf = devlink_priv(devlink);
+ bool iw_ena = ctx->val.vbool;
+ int ret;
+
+ if (!iw_ena) {
+ ice_unplug_aux_dev(pf);
+ pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_IWARP;
+ return 0;
+ }
+
+ pf->rdma_mode |= IIDC_RDMA_PROTOCOL_IWARP;
+ ret = ice_plug_aux_dev(pf);
+ if (ret)
+ pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_IWARP;
+
+ return ret;
+}
+
+static int
+ice_devlink_enable_iw_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_pf *pf = devlink_priv(devlink);
+
+ if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags))
+ return -EOPNOTSUPP;
+
+ if (pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2) {
+ NL_SET_ERR_MSG_MOD(extack, "RoCEv2 is currently enabled. This device cannot enable iWARP and RoCEv2 simultaneously");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static const struct devlink_param ice_devlink_params[] = {
+ DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+ ice_devlink_enable_roce_get,
+ ice_devlink_enable_roce_set,
+ ice_devlink_enable_roce_validate),
+ DEVLINK_PARAM_GENERIC(ENABLE_IWARP, BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+ ice_devlink_enable_iw_get,
+ ice_devlink_enable_iw_set,
+ ice_devlink_enable_iw_validate),
+
+};
+
+static void ice_devlink_free(void *devlink_ptr)
+{
+ devlink_free((struct devlink *)devlink_ptr);
+}
+
+/**
+ * ice_allocate_pf - Allocate devlink and return PF structure pointer
+ * @dev: the device to allocate for
+ *
+ * Allocate a devlink instance for this device and return the private area as
+ * the PF structure. The devlink memory is kept track of through devres by
+ * adding an action to remove it when unwinding.
+ */
+struct ice_pf *ice_allocate_pf(struct device *dev)
+{
+ struct devlink *devlink;
+
+ devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf), dev);
+ if (!devlink)
+ return NULL;
+
+ /* Add an action to teardown the devlink when unwinding the driver */
+ if (devm_add_action_or_reset(dev, ice_devlink_free, devlink))
+ return NULL;
+
+ return devlink_priv(devlink);
+}
+
+/**
+ * ice_devlink_register - Register devlink interface for this PF
+ * @pf: the PF to register the devlink for.
+ *
+ * Register the devlink instance associated with this physical function.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+void ice_devlink_register(struct ice_pf *pf)
+{
+ struct devlink *devlink = priv_to_devlink(pf);
+
+ devlink_set_features(devlink, DEVLINK_F_RELOAD);
+ devlink_register(devlink);
+}
+
+/**
+ * ice_devlink_unregister - Unregister devlink resources for this PF.
+ * @pf: the PF structure to cleanup
+ *
+ * Releases resources used by devlink and cleans up associated memory.
+ */
+void ice_devlink_unregister(struct ice_pf *pf)
+{
+ devlink_unregister(priv_to_devlink(pf));
+}
+
+/**
+ * ice_devlink_set_switch_id - Set unique switch id based on pci dsn
+ * @pf: the PF to create a devlink port for
+ * @ppid: struct with switch id information
+ */
+static void
+ice_devlink_set_switch_id(struct ice_pf *pf, struct netdev_phys_item_id *ppid)
+{
+ struct pci_dev *pdev = pf->pdev;
+ u64 id;
+
+ id = pci_get_dsn(pdev);
+
+ ppid->id_len = sizeof(id);
+ put_unaligned_be64(id, &ppid->id);
+}
+
+int ice_devlink_register_params(struct ice_pf *pf)
+{
+ struct devlink *devlink = priv_to_devlink(pf);
+ union devlink_param_value value;
+ int err;
+
+ err = devlink_params_register(devlink, ice_devlink_params,
+ ARRAY_SIZE(ice_devlink_params));
+ if (err)
+ return err;
+
+ value.vbool = false;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP,
+ value);
+
+ value.vbool = test_bit(ICE_FLAG_RDMA_ENA, pf->flags) ? true : false;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
+ value);
+
+ return 0;
+}
+
+void ice_devlink_unregister_params(struct ice_pf *pf)
+{
+ devlink_params_unregister(priv_to_devlink(pf), ice_devlink_params,
+ ARRAY_SIZE(ice_devlink_params));
+}
+
+/**
+ * ice_devlink_set_port_split_options - Set port split options
+ * @pf: the PF to set port split options
+ * @attrs: devlink attributes
+ *
+ * Sets devlink port split options based on available FW port options
+ */
+static void
+ice_devlink_set_port_split_options(struct ice_pf *pf,
+ struct devlink_port_attrs *attrs)
+{
+ struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX];
+ u8 i, active_idx, pending_idx, option_count = ICE_AQC_PORT_OPT_MAX;
+ bool active_valid, pending_valid;
+ int status;
+
+ status = ice_aq_get_port_options(&pf->hw, options, &option_count,
+ 0, true, &active_idx, &active_valid,
+ &pending_idx, &pending_valid);
+ if (status) {
+ dev_dbg(ice_pf_to_dev(pf), "Couldn't read port split options, err = %d\n",
+ status);
+ return;
+ }
+
+ /* find the biggest available port split count */
+ for (i = 0; i < option_count; i++)
+ attrs->lanes = max_t(int, attrs->lanes, options[i].pmd);
+
+ attrs->splittable = attrs->lanes ? 1 : 0;
+ ice_active_port_option = active_idx;
+}
+
+/**
+ * ice_devlink_create_pf_port - Create a devlink port for this PF
+ * @pf: the PF to create a devlink port for
+ *
+ * Create and register a devlink_port for this PF.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+int ice_devlink_create_pf_port(struct ice_pf *pf)
+{
+ struct devlink_port_attrs attrs = {};
+ struct devlink_port *devlink_port;
+ struct devlink *devlink;
+ struct ice_vsi *vsi;
+ struct device *dev;
+ int err;
+
+ dev = ice_pf_to_dev(pf);
+
+ devlink_port = &pf->devlink_port;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi)
+ return -EIO;
+
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+ attrs.phys.port_number = pf->hw.bus.func;
+
+ /* As FW supports only port split options for whole device,
+ * set port split options only for first PF.
+ */
+ if (pf->hw.pf_id == 0)
+ ice_devlink_set_port_split_options(pf, &attrs);
+
+ ice_devlink_set_switch_id(pf, &attrs.switch_id);
+
+ devlink_port_attrs_set(devlink_port, &attrs);
+ devlink = priv_to_devlink(pf);
+
+ err = devlink_port_register(devlink, devlink_port, vsi->idx);
+ if (err) {
+ dev_err(dev, "Failed to create devlink port for PF %d, error %d\n",
+ pf->hw.pf_id, err);
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_devlink_destroy_pf_port - Destroy the devlink_port for this PF
+ * @pf: the PF to cleanup
+ *
+ * Unregisters the devlink_port structure associated with this PF.
+ */
+void ice_devlink_destroy_pf_port(struct ice_pf *pf)
+{
+ struct devlink_port *devlink_port;
+
+ devlink_port = &pf->devlink_port;
+
+ devlink_port_type_clear(devlink_port);
+ devlink_port_unregister(devlink_port);
+}
+
+/**
+ * ice_devlink_create_vf_port - Create a devlink port for this VF
+ * @vf: the VF to create a port for
+ *
+ * Create and register a devlink_port for this VF.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+int ice_devlink_create_vf_port(struct ice_vf *vf)
+{
+ struct devlink_port_attrs attrs = {};
+ struct devlink_port *devlink_port;
+ struct devlink *devlink;
+ struct ice_vsi *vsi;
+ struct device *dev;
+ struct ice_pf *pf;
+ int err;
+
+ pf = vf->pf;
+ dev = ice_pf_to_dev(pf);
+ devlink_port = &vf->devlink_port;
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi)
+ return -EINVAL;
+
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF;
+ attrs.pci_vf.pf = pf->hw.bus.func;
+ attrs.pci_vf.vf = vf->vf_id;
+
+ ice_devlink_set_switch_id(pf, &attrs.switch_id);
+
+ devlink_port_attrs_set(devlink_port, &attrs);
+ devlink = priv_to_devlink(pf);
+
+ err = devlink_port_register(devlink, devlink_port, vsi->idx);
+ if (err) {
+ dev_err(dev, "Failed to create devlink port for VF %d, error %d\n",
+ vf->vf_id, err);
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_devlink_destroy_vf_port - Destroy the devlink_port for this VF
+ * @vf: the VF to cleanup
+ *
+ * Unregisters the devlink_port structure associated with this VF.
+ */
+void ice_devlink_destroy_vf_port(struct ice_vf *vf)
+{
+ struct devlink_port *devlink_port;
+
+ devlink_port = &vf->devlink_port;
+
+ devlink_port_type_clear(devlink_port);
+ devlink_port_unregister(devlink_port);
+}
+
+#define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024)
+
+/**
+ * ice_devlink_nvm_snapshot - Capture a snapshot of the NVM flash contents
+ * @devlink: the devlink instance
+ * @ops: the devlink region being snapshotted
+ * @extack: extended ACK response structure
+ * @data: on exit points to snapshot data buffer
+ *
+ * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for
+ * the nvm-flash devlink region. It captures a snapshot of the full NVM flash
+ * contents, including both banks of flash. This snapshot can later be viewed
+ * via the devlink-region interface.
+ *
+ * It captures the flash using the FLASH_ONLY bit set when reading via
+ * firmware, so it does not read the current Shadow RAM contents. For that,
+ * use the shadow-ram region.
+ *
+ * @returns zero on success, and updates the data pointer. Returns a non-zero
+ * error code on failure.
+ */
+static int ice_devlink_nvm_snapshot(struct devlink *devlink,
+ const struct devlink_region_ops *ops,
+ struct netlink_ext_ack *extack, u8 **data)
+{
+ struct ice_pf *pf = devlink_priv(devlink);
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ u8 *nvm_data, *tmp, i;
+ u32 nvm_size, left;
+ s8 num_blks;
+ int status;
+
+ nvm_size = hw->flash.flash_size;
+ nvm_data = vzalloc(nvm_size);
+ if (!nvm_data)
+ return -ENOMEM;
+
+
+ num_blks = DIV_ROUND_UP(nvm_size, ICE_DEVLINK_READ_BLK_SIZE);
+ tmp = nvm_data;
+ left = nvm_size;
+
+ /* Some systems take longer to read the NVM than others which causes the
+ * FW to reclaim the NVM lock before the entire NVM has been read. Fix
+ * this by breaking the reads of the NVM into smaller chunks that will
+ * probably not take as long. This has some overhead since we are
+ * increasing the number of AQ commands, but it should always work
+ */
+ for (i = 0; i < num_blks; i++) {
+ u32 read_sz = min_t(u32, ICE_DEVLINK_READ_BLK_SIZE, left);
+
+ status = ice_acquire_nvm(hw, ICE_RES_READ);
+ if (status) {
+ dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n",
+ status, hw->adminq.sq_last_status);
+ NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore");
+ vfree(nvm_data);
+ return -EIO;
+ }
+
+ status = ice_read_flat_nvm(hw, i * ICE_DEVLINK_READ_BLK_SIZE,
+ &read_sz, tmp, false);
+ if (status) {
+ dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n",
+ read_sz, status, hw->adminq.sq_last_status);
+ NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents");
+ ice_release_nvm(hw);
+ vfree(nvm_data);
+ return -EIO;
+ }
+ ice_release_nvm(hw);
+
+ tmp += read_sz;
+ left -= read_sz;
+ }
+
+ *data = nvm_data;
+
+ return 0;
+}
+
+/**
+ * ice_devlink_sram_snapshot - Capture a snapshot of the Shadow RAM contents
+ * @devlink: the devlink instance
+ * @ops: the devlink region being snapshotted
+ * @extack: extended ACK response structure
+ * @data: on exit points to snapshot data buffer
+ *
+ * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for
+ * the shadow-ram devlink region. It captures a snapshot of the shadow ram
+ * contents. This snapshot can later be viewed via the devlink-region
+ * interface.
+ *
+ * @returns zero on success, and updates the data pointer. Returns a non-zero
+ * error code on failure.
+ */
+static int
+ice_devlink_sram_snapshot(struct devlink *devlink,
+ const struct devlink_region_ops __always_unused *ops,
+ struct netlink_ext_ack *extack, u8 **data)
+{
+ struct ice_pf *pf = devlink_priv(devlink);
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ u8 *sram_data;
+ u32 sram_size;
+ int err;
+
+ sram_size = hw->flash.sr_words * 2u;
+ sram_data = vzalloc(sram_size);
+ if (!sram_data)
+ return -ENOMEM;
+
+ err = ice_acquire_nvm(hw, ICE_RES_READ);
+ if (err) {
+ dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n",
+ err, hw->adminq.sq_last_status);
+ NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore");
+ vfree(sram_data);
+ return err;
+ }
+
+ /* Read from the Shadow RAM, rather than directly from NVM */
+ err = ice_read_flat_nvm(hw, 0, &sram_size, sram_data, true);
+ if (err) {
+ dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n",
+ sram_size, err, hw->adminq.sq_last_status);
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to read Shadow RAM contents");
+ ice_release_nvm(hw);
+ vfree(sram_data);
+ return err;
+ }
+
+ ice_release_nvm(hw);
+
+ *data = sram_data;
+
+ return 0;
+}
+
+/**
+ * ice_devlink_devcaps_snapshot - Capture snapshot of device capabilities
+ * @devlink: the devlink instance
+ * @ops: the devlink region being snapshotted
+ * @extack: extended ACK response structure
+ * @data: on exit points to snapshot data buffer
+ *
+ * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for
+ * the device-caps devlink region. It captures a snapshot of the device
+ * capabilities reported by firmware.
+ *
+ * @returns zero on success, and updates the data pointer. Returns a non-zero
+ * error code on failure.
+ */
+static int
+ice_devlink_devcaps_snapshot(struct devlink *devlink,
+ const struct devlink_region_ops *ops,
+ struct netlink_ext_ack *extack, u8 **data)
+{
+ struct ice_pf *pf = devlink_priv(devlink);
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ void *devcaps;
+ int status;
+
+ devcaps = vzalloc(ICE_AQ_MAX_BUF_LEN);
+ if (!devcaps)
+ return -ENOMEM;
+
+ status = ice_aq_list_caps(hw, devcaps, ICE_AQ_MAX_BUF_LEN, NULL,
+ ice_aqc_opc_list_dev_caps, NULL);
+ if (status) {
+ dev_dbg(dev, "ice_aq_list_caps: failed to read device capabilities, err %d aq_err %d\n",
+ status, hw->adminq.sq_last_status);
+ NL_SET_ERR_MSG_MOD(extack, "Failed to read device capabilities");
+ vfree(devcaps);
+ return status;
+ }
+
+ *data = (u8 *)devcaps;
+
+ return 0;
+}
+
+static const struct devlink_region_ops ice_nvm_region_ops = {
+ .name = "nvm-flash",
+ .destructor = vfree,
+ .snapshot = ice_devlink_nvm_snapshot,
+};
+
+static const struct devlink_region_ops ice_sram_region_ops = {
+ .name = "shadow-ram",
+ .destructor = vfree,
+ .snapshot = ice_devlink_sram_snapshot,
+};
+
+static const struct devlink_region_ops ice_devcaps_region_ops = {
+ .name = "device-caps",
+ .destructor = vfree,
+ .snapshot = ice_devlink_devcaps_snapshot,
+};
+
+/**
+ * ice_devlink_init_regions - Initialize devlink regions
+ * @pf: the PF device structure
+ *
+ * Create devlink regions used to enable access to dump the contents of the
+ * flash memory on the device.
+ */
+void ice_devlink_init_regions(struct ice_pf *pf)
+{
+ struct devlink *devlink = priv_to_devlink(pf);
+ struct device *dev = ice_pf_to_dev(pf);
+ u64 nvm_size, sram_size;
+
+ nvm_size = pf->hw.flash.flash_size;
+ pf->nvm_region = devlink_region_create(devlink, &ice_nvm_region_ops, 1,
+ nvm_size);
+ if (IS_ERR(pf->nvm_region)) {
+ dev_err(dev, "failed to create NVM devlink region, err %ld\n",
+ PTR_ERR(pf->nvm_region));
+ pf->nvm_region = NULL;
+ }
+
+ sram_size = pf->hw.flash.sr_words * 2u;
+ pf->sram_region = devlink_region_create(devlink, &ice_sram_region_ops,
+ 1, sram_size);
+ if (IS_ERR(pf->sram_region)) {
+ dev_err(dev, "failed to create shadow-ram devlink region, err %ld\n",
+ PTR_ERR(pf->sram_region));
+ pf->sram_region = NULL;
+ }
+
+ pf->devcaps_region = devlink_region_create(devlink,
+ &ice_devcaps_region_ops, 10,
+ ICE_AQ_MAX_BUF_LEN);
+ if (IS_ERR(pf->devcaps_region)) {
+ dev_err(dev, "failed to create device-caps devlink region, err %ld\n",
+ PTR_ERR(pf->devcaps_region));
+ pf->devcaps_region = NULL;
+ }
+}
+
+/**
+ * ice_devlink_destroy_regions - Destroy devlink regions
+ * @pf: the PF device structure
+ *
+ * Remove previously created regions for this PF.
+ */
+void ice_devlink_destroy_regions(struct ice_pf *pf)
+{
+ if (pf->nvm_region)
+ devlink_region_destroy(pf->nvm_region);
+
+ if (pf->sram_region)
+ devlink_region_destroy(pf->sram_region);
+
+ if (pf->devcaps_region)
+ devlink_region_destroy(pf->devcaps_region);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.h b/drivers/net/ethernet/intel/ice/ice_devlink.h
new file mode 100644
index 000000000..fe006d994
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DEVLINK_H_
+#define _ICE_DEVLINK_H_
+
+struct ice_pf *ice_allocate_pf(struct device *dev);
+
+void ice_devlink_register(struct ice_pf *pf);
+void ice_devlink_unregister(struct ice_pf *pf);
+int ice_devlink_register_params(struct ice_pf *pf);
+void ice_devlink_unregister_params(struct ice_pf *pf);
+int ice_devlink_create_pf_port(struct ice_pf *pf);
+void ice_devlink_destroy_pf_port(struct ice_pf *pf);
+int ice_devlink_create_vf_port(struct ice_vf *vf);
+void ice_devlink_destroy_vf_port(struct ice_vf *vf);
+
+void ice_devlink_init_regions(struct ice_pf *pf);
+void ice_devlink_destroy_regions(struct ice_pf *pf);
+
+#endif /* _ICE_DEVLINK_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c
new file mode 100644
index 000000000..7de4a8a4b
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c
@@ -0,0 +1,717 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_eswitch.h"
+#include "ice_fltr.h"
+#include "ice_repr.h"
+#include "ice_devlink.h"
+#include "ice_tc_lib.h"
+
+/**
+ * ice_eswitch_add_vf_mac_rule - add adv rule with VF's MAC
+ * @pf: pointer to PF struct
+ * @vf: pointer to VF struct
+ * @mac: VF's MAC address
+ *
+ * This function adds advanced rule that forwards packets with
+ * VF's MAC address (src MAC) to the corresponding switchdev ctrl VSI queue.
+ */
+int
+ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf, const u8 *mac)
+{
+ struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+ struct ice_adv_rule_info rule_info = { 0 };
+ struct ice_adv_lkup_elem *list;
+ struct ice_hw *hw = &pf->hw;
+ const u16 lkups_cnt = 1;
+ int err;
+
+ list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC);
+ if (!list)
+ return -ENOMEM;
+
+ list[0].type = ICE_MAC_OFOS;
+ ether_addr_copy(list[0].h_u.eth_hdr.src_addr, mac);
+ eth_broadcast_addr(list[0].m_u.eth_hdr.src_addr);
+
+ rule_info.sw_act.flag |= ICE_FLTR_TX;
+ rule_info.sw_act.vsi_handle = ctrl_vsi->idx;
+ rule_info.sw_act.fltr_act = ICE_FWD_TO_Q;
+ rule_info.rx = false;
+ rule_info.sw_act.fwd_id.q_id = hw->func_caps.common_cap.rxq_first_id +
+ ctrl_vsi->rxq_map[vf->vf_id];
+ rule_info.flags_info.act |= ICE_SINGLE_ACT_LB_ENABLE;
+ rule_info.flags_info.act_valid = true;
+ rule_info.tun_type = ICE_SW_TUN_AND_NON_TUN;
+
+ err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info,
+ vf->repr->mac_rule);
+ if (err)
+ dev_err(ice_pf_to_dev(pf), "Unable to add VF mac rule in switchdev mode for VF %d",
+ vf->vf_id);
+ else
+ vf->repr->rule_added = true;
+
+ kfree(list);
+ return err;
+}
+
+/**
+ * ice_eswitch_replay_vf_mac_rule - replay adv rule with VF's MAC
+ * @vf: pointer to vF struct
+ *
+ * This function replays VF's MAC rule after reset.
+ */
+void ice_eswitch_replay_vf_mac_rule(struct ice_vf *vf)
+{
+ int err;
+
+ if (!ice_is_switchdev_running(vf->pf))
+ return;
+
+ if (is_valid_ether_addr(vf->hw_lan_addr.addr)) {
+ err = ice_eswitch_add_vf_mac_rule(vf->pf, vf,
+ vf->hw_lan_addr.addr);
+ if (err) {
+ dev_err(ice_pf_to_dev(vf->pf), "Failed to add MAC %pM for VF %d\n, error %d\n",
+ vf->hw_lan_addr.addr, vf->vf_id, err);
+ return;
+ }
+ vf->num_mac++;
+
+ ether_addr_copy(vf->dev_lan_addr.addr, vf->hw_lan_addr.addr);
+ }
+}
+
+/**
+ * ice_eswitch_del_vf_mac_rule - delete adv rule with VF's MAC
+ * @vf: pointer to the VF struct
+ *
+ * Delete the advanced rule that was used to forward packets with the VF's MAC
+ * address (src MAC) to the corresponding switchdev ctrl VSI queue.
+ */
+void ice_eswitch_del_vf_mac_rule(struct ice_vf *vf)
+{
+ if (!ice_is_switchdev_running(vf->pf))
+ return;
+
+ if (!vf->repr->rule_added)
+ return;
+
+ ice_rem_adv_rule_by_id(&vf->pf->hw, vf->repr->mac_rule);
+ vf->repr->rule_added = false;
+}
+
+/**
+ * ice_eswitch_setup_env - configure switchdev HW filters
+ * @pf: pointer to PF struct
+ *
+ * This function adds HW filters configuration specific for switchdev
+ * mode.
+ */
+static int ice_eswitch_setup_env(struct ice_pf *pf)
+{
+ struct ice_vsi *uplink_vsi = pf->switchdev.uplink_vsi;
+ struct net_device *uplink_netdev = uplink_vsi->netdev;
+ struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+ struct ice_vsi_vlan_ops *vlan_ops;
+ bool rule_added = false;
+
+ vlan_ops = ice_get_compat_vsi_vlan_ops(ctrl_vsi);
+ if (vlan_ops->dis_stripping(ctrl_vsi))
+ return -ENODEV;
+
+ ice_remove_vsi_fltr(&pf->hw, uplink_vsi->idx);
+
+ netif_addr_lock_bh(uplink_netdev);
+ __dev_uc_unsync(uplink_netdev, NULL);
+ __dev_mc_unsync(uplink_netdev, NULL);
+ netif_addr_unlock_bh(uplink_netdev);
+
+ if (ice_vsi_add_vlan_zero(uplink_vsi))
+ goto err_def_rx;
+
+ if (!ice_is_dflt_vsi_in_use(uplink_vsi->port_info)) {
+ if (ice_set_dflt_vsi(uplink_vsi))
+ goto err_def_rx;
+ rule_added = true;
+ }
+
+ if (ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_set_allow_override))
+ goto err_override_uplink;
+
+ if (ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_set_allow_override))
+ goto err_override_control;
+
+ return 0;
+
+err_override_control:
+ ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override);
+err_override_uplink:
+ if (rule_added)
+ ice_clear_dflt_vsi(uplink_vsi);
+err_def_rx:
+ ice_fltr_add_mac_and_broadcast(uplink_vsi,
+ uplink_vsi->port_info->mac.perm_addr,
+ ICE_FWD_TO_VSI);
+ return -ENODEV;
+}
+
+/**
+ * ice_eswitch_remap_rings_to_vectors - reconfigure rings of switchdev ctrl VSI
+ * @pf: pointer to PF struct
+ *
+ * In switchdev number of allocated Tx/Rx rings is equal.
+ *
+ * This function fills q_vectors structures associated with representor and
+ * move each ring pairs to port representor netdevs. Each port representor
+ * will have dedicated 1 Tx/Rx ring pair, so number of rings pair is equal to
+ * number of VFs.
+ */
+static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf)
+{
+ struct ice_vsi *vsi = pf->switchdev.control_vsi;
+ int q_id;
+
+ ice_for_each_txq(vsi, q_id) {
+ struct ice_q_vector *q_vector;
+ struct ice_tx_ring *tx_ring;
+ struct ice_rx_ring *rx_ring;
+ struct ice_repr *repr;
+ struct ice_vf *vf;
+
+ vf = ice_get_vf_by_id(pf, q_id);
+ if (WARN_ON(!vf))
+ continue;
+
+ repr = vf->repr;
+ q_vector = repr->q_vector;
+ tx_ring = vsi->tx_rings[q_id];
+ rx_ring = vsi->rx_rings[q_id];
+
+ q_vector->vsi = vsi;
+ q_vector->reg_idx = vsi->q_vectors[0]->reg_idx;
+
+ q_vector->num_ring_tx = 1;
+ q_vector->tx.tx_ring = tx_ring;
+ tx_ring->q_vector = q_vector;
+ tx_ring->next = NULL;
+ tx_ring->netdev = repr->netdev;
+ /* In switchdev mode, from OS stack perspective, there is only
+ * one queue for given netdev, so it needs to be indexed as 0.
+ */
+ tx_ring->q_index = 0;
+
+ q_vector->num_ring_rx = 1;
+ q_vector->rx.rx_ring = rx_ring;
+ rx_ring->q_vector = q_vector;
+ rx_ring->next = NULL;
+ rx_ring->netdev = repr->netdev;
+
+ ice_put_vf(vf);
+ }
+}
+
+/**
+ * ice_eswitch_release_reprs - clear PR VSIs configuration
+ * @pf: poiner to PF struct
+ * @ctrl_vsi: pointer to switchdev control VSI
+ */
+static void
+ice_eswitch_release_reprs(struct ice_pf *pf, struct ice_vsi *ctrl_vsi)
+{
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ lockdep_assert_held(&pf->vfs.table_lock);
+
+ ice_for_each_vf(pf, bkt, vf) {
+ struct ice_vsi *vsi = vf->repr->src_vsi;
+
+ /* Skip VFs that aren't configured */
+ if (!vf->repr->dst)
+ continue;
+
+ ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof);
+ metadata_dst_free(vf->repr->dst);
+ vf->repr->dst = NULL;
+ ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr.addr,
+ ICE_FWD_TO_VSI);
+
+ netif_napi_del(&vf->repr->q_vector->napi);
+ }
+}
+
+/**
+ * ice_eswitch_setup_reprs - configure port reprs to run in switchdev mode
+ * @pf: pointer to PF struct
+ */
+static int ice_eswitch_setup_reprs(struct ice_pf *pf)
+{
+ struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+ int max_vsi_num = 0;
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ lockdep_assert_held(&pf->vfs.table_lock);
+
+ ice_for_each_vf(pf, bkt, vf) {
+ struct ice_vsi *vsi = vf->repr->src_vsi;
+
+ ice_remove_vsi_fltr(&pf->hw, vsi->idx);
+ vf->repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX,
+ GFP_KERNEL);
+ if (!vf->repr->dst) {
+ ice_fltr_add_mac_and_broadcast(vsi,
+ vf->hw_lan_addr.addr,
+ ICE_FWD_TO_VSI);
+ goto err;
+ }
+
+ if (ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof)) {
+ ice_fltr_add_mac_and_broadcast(vsi,
+ vf->hw_lan_addr.addr,
+ ICE_FWD_TO_VSI);
+ metadata_dst_free(vf->repr->dst);
+ vf->repr->dst = NULL;
+ goto err;
+ }
+
+ if (ice_vsi_add_vlan_zero(vsi)) {
+ ice_fltr_add_mac_and_broadcast(vsi,
+ vf->hw_lan_addr.addr,
+ ICE_FWD_TO_VSI);
+ metadata_dst_free(vf->repr->dst);
+ vf->repr->dst = NULL;
+ ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof);
+ goto err;
+ }
+
+ if (max_vsi_num < vsi->vsi_num)
+ max_vsi_num = vsi->vsi_num;
+
+ netif_napi_add(vf->repr->netdev, &vf->repr->q_vector->napi,
+ ice_napi_poll);
+
+ netif_keep_dst(vf->repr->netdev);
+ }
+
+ ice_for_each_vf(pf, bkt, vf) {
+ struct ice_repr *repr = vf->repr;
+ struct ice_vsi *vsi = repr->src_vsi;
+ struct metadata_dst *dst;
+
+ dst = repr->dst;
+ dst->u.port_info.port_id = vsi->vsi_num;
+ dst->u.port_info.lower_dev = repr->netdev;
+ ice_repr_set_traffic_vsi(repr, ctrl_vsi);
+ }
+
+ return 0;
+
+err:
+ ice_eswitch_release_reprs(pf, ctrl_vsi);
+
+ return -ENODEV;
+}
+
+/**
+ * ice_eswitch_update_repr - reconfigure VF port representor
+ * @vsi: VF VSI for which port representor is configured
+ */
+void ice_eswitch_update_repr(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_repr *repr;
+ struct ice_vf *vf;
+ int ret;
+
+ if (!ice_is_switchdev_running(pf))
+ return;
+
+ vf = vsi->vf;
+ repr = vf->repr;
+ repr->src_vsi = vsi;
+ repr->dst->u.port_info.port_id = vsi->vsi_num;
+
+ ret = ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof);
+ if (ret) {
+ ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr.addr, ICE_FWD_TO_VSI);
+ dev_err(ice_pf_to_dev(pf), "Failed to update VF %d port representor",
+ vsi->vf->vf_id);
+ }
+}
+
+/**
+ * ice_eswitch_port_start_xmit - callback for packets transmit
+ * @skb: send buffer
+ * @netdev: network interface device structure
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+netdev_tx_t
+ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct ice_netdev_priv *np;
+ struct ice_repr *repr;
+ struct ice_vsi *vsi;
+
+ np = netdev_priv(netdev);
+ vsi = np->vsi;
+
+ if (!vsi || !ice_is_switchdev_running(vsi->back))
+ return NETDEV_TX_BUSY;
+
+ if (ice_is_reset_in_progress(vsi->back->state) ||
+ test_bit(ICE_VF_DIS, vsi->back->state))
+ return NETDEV_TX_BUSY;
+
+ repr = ice_netdev_to_repr(netdev);
+ skb_dst_drop(skb);
+ dst_hold((struct dst_entry *)repr->dst);
+ skb_dst_set(skb, (struct dst_entry *)repr->dst);
+ skb->queue_mapping = repr->vf->vf_id;
+
+ return ice_start_xmit(skb, netdev);
+}
+
+/**
+ * ice_eswitch_set_target_vsi - set switchdev context in Tx context descriptor
+ * @skb: pointer to send buffer
+ * @off: pointer to offload struct
+ */
+void
+ice_eswitch_set_target_vsi(struct sk_buff *skb,
+ struct ice_tx_offload_params *off)
+{
+ struct metadata_dst *dst = skb_metadata_dst(skb);
+ u64 cd_cmd, dst_vsi;
+
+ if (!dst) {
+ cd_cmd = ICE_TX_CTX_DESC_SWTCH_UPLINK << ICE_TXD_CTX_QW1_CMD_S;
+ off->cd_qw1 |= (cd_cmd | ICE_TX_DESC_DTYPE_CTX);
+ } else {
+ cd_cmd = ICE_TX_CTX_DESC_SWTCH_VSI << ICE_TXD_CTX_QW1_CMD_S;
+ dst_vsi = ((u64)dst->u.port_info.port_id <<
+ ICE_TXD_CTX_QW1_VSI_S) & ICE_TXD_CTX_QW1_VSI_M;
+ off->cd_qw1 = cd_cmd | dst_vsi | ICE_TX_DESC_DTYPE_CTX;
+ }
+}
+
+/**
+ * ice_eswitch_release_env - clear switchdev HW filters
+ * @pf: pointer to PF struct
+ *
+ * This function removes HW filters configuration specific for switchdev
+ * mode and restores default legacy mode settings.
+ */
+static void ice_eswitch_release_env(struct ice_pf *pf)
+{
+ struct ice_vsi *uplink_vsi = pf->switchdev.uplink_vsi;
+ struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+
+ ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_clear_allow_override);
+ ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override);
+ ice_clear_dflt_vsi(uplink_vsi);
+ ice_fltr_add_mac_and_broadcast(uplink_vsi,
+ uplink_vsi->port_info->mac.perm_addr,
+ ICE_FWD_TO_VSI);
+}
+
+/**
+ * ice_eswitch_vsi_setup - configure switchdev control VSI
+ * @pf: pointer to PF structure
+ * @pi: pointer to port_info structure
+ */
+static struct ice_vsi *
+ice_eswitch_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
+{
+ return ice_vsi_setup(pf, pi, ICE_VSI_SWITCHDEV_CTRL, NULL, NULL);
+}
+
+/**
+ * ice_eswitch_napi_del - remove NAPI handle for all port representors
+ * @pf: pointer to PF structure
+ */
+static void ice_eswitch_napi_del(struct ice_pf *pf)
+{
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ lockdep_assert_held(&pf->vfs.table_lock);
+
+ ice_for_each_vf(pf, bkt, vf)
+ netif_napi_del(&vf->repr->q_vector->napi);
+}
+
+/**
+ * ice_eswitch_napi_enable - enable NAPI for all port representors
+ * @pf: pointer to PF structure
+ */
+static void ice_eswitch_napi_enable(struct ice_pf *pf)
+{
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ lockdep_assert_held(&pf->vfs.table_lock);
+
+ ice_for_each_vf(pf, bkt, vf)
+ napi_enable(&vf->repr->q_vector->napi);
+}
+
+/**
+ * ice_eswitch_napi_disable - disable NAPI for all port representors
+ * @pf: pointer to PF structure
+ */
+static void ice_eswitch_napi_disable(struct ice_pf *pf)
+{
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ lockdep_assert_held(&pf->vfs.table_lock);
+
+ ice_for_each_vf(pf, bkt, vf)
+ napi_disable(&vf->repr->q_vector->napi);
+}
+
+/**
+ * ice_eswitch_enable_switchdev - configure eswitch in switchdev mode
+ * @pf: pointer to PF structure
+ */
+static int ice_eswitch_enable_switchdev(struct ice_pf *pf)
+{
+ struct ice_vsi *ctrl_vsi;
+
+ pf->switchdev.control_vsi = ice_eswitch_vsi_setup(pf, pf->hw.port_info);
+ if (!pf->switchdev.control_vsi)
+ return -ENODEV;
+
+ ctrl_vsi = pf->switchdev.control_vsi;
+ pf->switchdev.uplink_vsi = ice_get_main_vsi(pf);
+ if (!pf->switchdev.uplink_vsi)
+ goto err_vsi;
+
+ if (ice_eswitch_setup_env(pf))
+ goto err_vsi;
+
+ if (ice_repr_add_for_all_vfs(pf))
+ goto err_repr_add;
+
+ if (ice_eswitch_setup_reprs(pf))
+ goto err_setup_reprs;
+
+ ice_eswitch_remap_rings_to_vectors(pf);
+
+ if (ice_vsi_open(ctrl_vsi))
+ goto err_setup_reprs;
+
+ ice_eswitch_napi_enable(pf);
+
+ return 0;
+
+err_setup_reprs:
+ ice_repr_rem_from_all_vfs(pf);
+err_repr_add:
+ ice_eswitch_release_env(pf);
+err_vsi:
+ ice_vsi_release(ctrl_vsi);
+ return -ENODEV;
+}
+
+/**
+ * ice_eswitch_disable_switchdev - disable switchdev resources
+ * @pf: pointer to PF structure
+ */
+static void ice_eswitch_disable_switchdev(struct ice_pf *pf)
+{
+ struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+
+ ice_eswitch_napi_disable(pf);
+ ice_eswitch_release_env(pf);
+ ice_rem_adv_rule_for_vsi(&pf->hw, ctrl_vsi->idx);
+ ice_eswitch_release_reprs(pf, ctrl_vsi);
+ ice_vsi_release(ctrl_vsi);
+ ice_repr_rem_from_all_vfs(pf);
+}
+
+/**
+ * ice_eswitch_mode_set - set new eswitch mode
+ * @devlink: pointer to devlink structure
+ * @mode: eswitch mode to switch to
+ * @extack: pointer to extack structure
+ */
+int
+ice_eswitch_mode_set(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_pf *pf = devlink_priv(devlink);
+
+ if (pf->eswitch_mode == mode)
+ return 0;
+
+ if (ice_has_vfs(pf)) {
+ dev_info(ice_pf_to_dev(pf), "Changing eswitch mode is allowed only if there is no VFs created");
+ NL_SET_ERR_MSG_MOD(extack, "Changing eswitch mode is allowed only if there is no VFs created");
+ return -EOPNOTSUPP;
+ }
+
+ switch (mode) {
+ case DEVLINK_ESWITCH_MODE_LEGACY:
+ dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to legacy",
+ pf->hw.pf_id);
+ NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to legacy");
+ break;
+ case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+ {
+ if (ice_is_adq_active(pf)) {
+ dev_err(ice_pf_to_dev(pf), "Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root");
+ NL_SET_ERR_MSG_MOD(extack, "Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root");
+ return -EOPNOTSUPP;
+ }
+
+ dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to switchdev",
+ pf->hw.pf_id);
+ NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to switchdev");
+ break;
+ }
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Unknown eswitch mode");
+ return -EINVAL;
+ }
+
+ pf->eswitch_mode = mode;
+ return 0;
+}
+
+/**
+ * ice_eswitch_mode_get - get current eswitch mode
+ * @devlink: pointer to devlink structure
+ * @mode: output parameter for current eswitch mode
+ */
+int ice_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+ struct ice_pf *pf = devlink_priv(devlink);
+
+ *mode = pf->eswitch_mode;
+ return 0;
+}
+
+/**
+ * ice_is_eswitch_mode_switchdev - check if eswitch mode is set to switchdev
+ * @pf: pointer to PF structure
+ *
+ * Returns true if eswitch mode is set to DEVLINK_ESWITCH_MODE_SWITCHDEV,
+ * false otherwise.
+ */
+bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf)
+{
+ return pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV;
+}
+
+/**
+ * ice_eswitch_release - cleanup eswitch
+ * @pf: pointer to PF structure
+ */
+void ice_eswitch_release(struct ice_pf *pf)
+{
+ if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY)
+ return;
+
+ ice_eswitch_disable_switchdev(pf);
+ pf->switchdev.is_running = false;
+}
+
+/**
+ * ice_eswitch_configure - configure eswitch
+ * @pf: pointer to PF structure
+ */
+int ice_eswitch_configure(struct ice_pf *pf)
+{
+ int status;
+
+ if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY || pf->switchdev.is_running)
+ return 0;
+
+ status = ice_eswitch_enable_switchdev(pf);
+ if (status)
+ return status;
+
+ pf->switchdev.is_running = true;
+ return 0;
+}
+
+/**
+ * ice_eswitch_start_all_tx_queues - start Tx queues of all port representors
+ * @pf: pointer to PF structure
+ */
+static void ice_eswitch_start_all_tx_queues(struct ice_pf *pf)
+{
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ lockdep_assert_held(&pf->vfs.table_lock);
+
+ if (test_bit(ICE_DOWN, pf->state))
+ return;
+
+ ice_for_each_vf(pf, bkt, vf) {
+ if (vf->repr)
+ ice_repr_start_tx_queues(vf->repr);
+ }
+}
+
+/**
+ * ice_eswitch_stop_all_tx_queues - stop Tx queues of all port representors
+ * @pf: pointer to PF structure
+ */
+void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf)
+{
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ lockdep_assert_held(&pf->vfs.table_lock);
+
+ if (test_bit(ICE_DOWN, pf->state))
+ return;
+
+ ice_for_each_vf(pf, bkt, vf) {
+ if (vf->repr)
+ ice_repr_stop_tx_queues(vf->repr);
+ }
+}
+
+/**
+ * ice_eswitch_rebuild - rebuild eswitch
+ * @pf: pointer to PF structure
+ */
+int ice_eswitch_rebuild(struct ice_pf *pf)
+{
+ struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+ int status;
+
+ ice_eswitch_napi_disable(pf);
+ ice_eswitch_napi_del(pf);
+
+ status = ice_eswitch_setup_env(pf);
+ if (status)
+ return status;
+
+ status = ice_eswitch_setup_reprs(pf);
+ if (status)
+ return status;
+
+ ice_eswitch_remap_rings_to_vectors(pf);
+
+ ice_replay_tc_fltrs(pf);
+
+ status = ice_vsi_open(ctrl_vsi);
+ if (status)
+ return status;
+
+ ice_eswitch_napi_enable(pf);
+ ice_eswitch_start_all_tx_queues(pf);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h
new file mode 100644
index 000000000..6a4133315
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_ESWITCH_H_
+#define _ICE_ESWITCH_H_
+
+#include <net/devlink.h>
+
+#ifdef CONFIG_ICE_SWITCHDEV
+void ice_eswitch_release(struct ice_pf *pf);
+int ice_eswitch_configure(struct ice_pf *pf);
+int ice_eswitch_rebuild(struct ice_pf *pf);
+
+int ice_eswitch_mode_get(struct devlink *devlink, u16 *mode);
+int
+ice_eswitch_mode_set(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack);
+bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf);
+
+void ice_eswitch_update_repr(struct ice_vsi *vsi);
+
+void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf);
+int
+ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf,
+ const u8 *mac);
+void ice_eswitch_replay_vf_mac_rule(struct ice_vf *vf);
+void ice_eswitch_del_vf_mac_rule(struct ice_vf *vf);
+
+void ice_eswitch_set_target_vsi(struct sk_buff *skb,
+ struct ice_tx_offload_params *off);
+netdev_tx_t
+ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev);
+#else /* CONFIG_ICE_SWITCHDEV */
+static inline void ice_eswitch_release(struct ice_pf *pf) { }
+
+static inline void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) { }
+static inline void ice_eswitch_replay_vf_mac_rule(struct ice_vf *vf) { }
+static inline void ice_eswitch_del_vf_mac_rule(struct ice_vf *vf) { }
+
+static inline int
+ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf,
+ const u8 *mac)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void
+ice_eswitch_set_target_vsi(struct sk_buff *skb,
+ struct ice_tx_offload_params *off) { }
+
+static inline void ice_eswitch_update_repr(struct ice_vsi *vsi) { }
+
+static inline int ice_eswitch_configure(struct ice_pf *pf)
+{
+ return 0;
+}
+
+static inline int ice_eswitch_rebuild(struct ice_pf *pf)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int ice_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+ return DEVLINK_ESWITCH_MODE_LEGACY;
+}
+
+static inline int
+ice_eswitch_mode_set(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf)
+{
+ return false;
+}
+
+static inline netdev_tx_t
+ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ return NETDEV_TX_BUSY;
+}
+#endif /* CONFIG_ICE_SWITCHDEV */
+#endif /* _ICE_ESWITCH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
new file mode 100644
index 000000000..02eb78df2
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -0,0 +1,4285 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* ethtool support for ice */
+
+#include "ice.h"
+#include "ice_flow.h"
+#include "ice_fltr.h"
+#include "ice_lib.h"
+#include "ice_dcb_lib.h"
+#include <net/dcbnl.h>
+
+struct ice_stats {
+ char stat_string[ETH_GSTRING_LEN];
+ int sizeof_stat;
+ int stat_offset;
+};
+
+#define ICE_STAT(_type, _name, _stat) { \
+ .stat_string = _name, \
+ .sizeof_stat = sizeof_field(_type, _stat), \
+ .stat_offset = offsetof(_type, _stat) \
+}
+
+#define ICE_VSI_STAT(_name, _stat) \
+ ICE_STAT(struct ice_vsi, _name, _stat)
+#define ICE_PF_STAT(_name, _stat) \
+ ICE_STAT(struct ice_pf, _name, _stat)
+
+static int ice_q_stats_len(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+
+ return ((np->vsi->alloc_txq + np->vsi->alloc_rxq) *
+ (sizeof(struct ice_q_stats) / sizeof(u64)));
+}
+
+#define ICE_PF_STATS_LEN ARRAY_SIZE(ice_gstrings_pf_stats)
+#define ICE_VSI_STATS_LEN ARRAY_SIZE(ice_gstrings_vsi_stats)
+
+#define ICE_PFC_STATS_LEN ( \
+ (sizeof_field(struct ice_pf, stats.priority_xoff_rx) + \
+ sizeof_field(struct ice_pf, stats.priority_xon_rx) + \
+ sizeof_field(struct ice_pf, stats.priority_xoff_tx) + \
+ sizeof_field(struct ice_pf, stats.priority_xon_tx)) \
+ / sizeof(u64))
+#define ICE_ALL_STATS_LEN(n) (ICE_PF_STATS_LEN + ICE_PFC_STATS_LEN + \
+ ICE_VSI_STATS_LEN + ice_q_stats_len(n))
+
+static const struct ice_stats ice_gstrings_vsi_stats[] = {
+ ICE_VSI_STAT("rx_unicast", eth_stats.rx_unicast),
+ ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
+ ICE_VSI_STAT("rx_multicast", eth_stats.rx_multicast),
+ ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast),
+ ICE_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast),
+ ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
+ ICE_VSI_STAT("rx_bytes", eth_stats.rx_bytes),
+ ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes),
+ ICE_VSI_STAT("rx_dropped", eth_stats.rx_discards),
+ ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
+ ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed),
+ ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
+ ICE_VSI_STAT("tx_errors", eth_stats.tx_errors),
+ ICE_VSI_STAT("tx_linearize", tx_linearize),
+ ICE_VSI_STAT("tx_busy", tx_busy),
+ ICE_VSI_STAT("tx_restart", tx_restart),
+};
+
+enum ice_ethtool_test_id {
+ ICE_ETH_TEST_REG = 0,
+ ICE_ETH_TEST_EEPROM,
+ ICE_ETH_TEST_INTR,
+ ICE_ETH_TEST_LOOP,
+ ICE_ETH_TEST_LINK,
+};
+
+static const char ice_gstrings_test[][ETH_GSTRING_LEN] = {
+ "Register test (offline)",
+ "EEPROM test (offline)",
+ "Interrupt test (offline)",
+ "Loopback test (offline)",
+ "Link test (on/offline)",
+};
+
+#define ICE_TEST_LEN (sizeof(ice_gstrings_test) / ETH_GSTRING_LEN)
+
+/* These PF_STATs might look like duplicates of some NETDEV_STATs,
+ * but they aren't. This device is capable of supporting multiple
+ * VSIs/netdevs on a single PF. The NETDEV_STATs are for individual
+ * netdevs whereas the PF_STATs are for the physical function that's
+ * hosting these netdevs.
+ *
+ * The PF_STATs are appended to the netdev stats only when ethtool -S
+ * is queried on the base PF netdev.
+ */
+static const struct ice_stats ice_gstrings_pf_stats[] = {
+ ICE_PF_STAT("rx_bytes.nic", stats.eth.rx_bytes),
+ ICE_PF_STAT("tx_bytes.nic", stats.eth.tx_bytes),
+ ICE_PF_STAT("rx_unicast.nic", stats.eth.rx_unicast),
+ ICE_PF_STAT("tx_unicast.nic", stats.eth.tx_unicast),
+ ICE_PF_STAT("rx_multicast.nic", stats.eth.rx_multicast),
+ ICE_PF_STAT("tx_multicast.nic", stats.eth.tx_multicast),
+ ICE_PF_STAT("rx_broadcast.nic", stats.eth.rx_broadcast),
+ ICE_PF_STAT("tx_broadcast.nic", stats.eth.tx_broadcast),
+ ICE_PF_STAT("tx_errors.nic", stats.eth.tx_errors),
+ ICE_PF_STAT("tx_timeout.nic", tx_timeout_count),
+ ICE_PF_STAT("rx_size_64.nic", stats.rx_size_64),
+ ICE_PF_STAT("tx_size_64.nic", stats.tx_size_64),
+ ICE_PF_STAT("rx_size_127.nic", stats.rx_size_127),
+ ICE_PF_STAT("tx_size_127.nic", stats.tx_size_127),
+ ICE_PF_STAT("rx_size_255.nic", stats.rx_size_255),
+ ICE_PF_STAT("tx_size_255.nic", stats.tx_size_255),
+ ICE_PF_STAT("rx_size_511.nic", stats.rx_size_511),
+ ICE_PF_STAT("tx_size_511.nic", stats.tx_size_511),
+ ICE_PF_STAT("rx_size_1023.nic", stats.rx_size_1023),
+ ICE_PF_STAT("tx_size_1023.nic", stats.tx_size_1023),
+ ICE_PF_STAT("rx_size_1522.nic", stats.rx_size_1522),
+ ICE_PF_STAT("tx_size_1522.nic", stats.tx_size_1522),
+ ICE_PF_STAT("rx_size_big.nic", stats.rx_size_big),
+ ICE_PF_STAT("tx_size_big.nic", stats.tx_size_big),
+ ICE_PF_STAT("link_xon_rx.nic", stats.link_xon_rx),
+ ICE_PF_STAT("link_xon_tx.nic", stats.link_xon_tx),
+ ICE_PF_STAT("link_xoff_rx.nic", stats.link_xoff_rx),
+ ICE_PF_STAT("link_xoff_tx.nic", stats.link_xoff_tx),
+ ICE_PF_STAT("tx_dropped_link_down.nic", stats.tx_dropped_link_down),
+ ICE_PF_STAT("rx_undersize.nic", stats.rx_undersize),
+ ICE_PF_STAT("rx_fragments.nic", stats.rx_fragments),
+ ICE_PF_STAT("rx_oversize.nic", stats.rx_oversize),
+ ICE_PF_STAT("rx_jabber.nic", stats.rx_jabber),
+ ICE_PF_STAT("rx_csum_bad.nic", hw_csum_rx_error),
+ ICE_PF_STAT("rx_length_errors.nic", stats.rx_len_errors),
+ ICE_PF_STAT("rx_dropped.nic", stats.eth.rx_discards),
+ ICE_PF_STAT("rx_crc_errors.nic", stats.crc_errors),
+ ICE_PF_STAT("illegal_bytes.nic", stats.illegal_bytes),
+ ICE_PF_STAT("mac_local_faults.nic", stats.mac_local_faults),
+ ICE_PF_STAT("mac_remote_faults.nic", stats.mac_remote_faults),
+ ICE_PF_STAT("fdir_sb_match.nic", stats.fd_sb_match),
+ ICE_PF_STAT("fdir_sb_status.nic", stats.fd_sb_status),
+ ICE_PF_STAT("tx_hwtstamp_skipped", ptp.tx_hwtstamp_skipped),
+ ICE_PF_STAT("tx_hwtstamp_timeouts", ptp.tx_hwtstamp_timeouts),
+ ICE_PF_STAT("tx_hwtstamp_flushed", ptp.tx_hwtstamp_flushed),
+ ICE_PF_STAT("tx_hwtstamp_discarded", ptp.tx_hwtstamp_discarded),
+ ICE_PF_STAT("late_cached_phc_updates", ptp.late_cached_phc_updates),
+};
+
+static const u32 ice_regs_dump_list[] = {
+ PFGEN_STATE,
+ PRTGEN_STATUS,
+ QRX_CTRL(0),
+ QINT_TQCTL(0),
+ QINT_RQCTL(0),
+ PFINT_OICR_ENA,
+ QRX_ITR(0),
+};
+
+struct ice_priv_flag {
+ char name[ETH_GSTRING_LEN];
+ u32 bitno; /* bit position in pf->flags */
+};
+
+#define ICE_PRIV_FLAG(_name, _bitno) { \
+ .name = _name, \
+ .bitno = _bitno, \
+}
+
+static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
+ ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
+ ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT),
+ ICE_PRIV_FLAG("vf-true-promisc-support",
+ ICE_FLAG_VF_TRUE_PROMISC_ENA),
+ ICE_PRIV_FLAG("mdd-auto-reset-vf", ICE_FLAG_MDD_AUTO_RESET_VF),
+ ICE_PRIV_FLAG("vf-vlan-pruning", ICE_FLAG_VF_VLAN_PRUNING),
+ ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX),
+};
+
+#define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags)
+
+static void
+__ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo,
+ struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ struct ice_orom_info *orom;
+ struct ice_nvm_info *nvm;
+
+ nvm = &hw->flash.nvm;
+ orom = &hw->flash.orom;
+
+ strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
+
+ /* Display NVM version (from which the firmware version can be
+ * determined) which contains more pertinent information.
+ */
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%x.%02x 0x%x %d.%d.%d", nvm->major, nvm->minor,
+ nvm->eetrack, orom->major, orom->build, orom->patch);
+
+ strscpy(drvinfo->bus_info, pci_name(pf->pdev),
+ sizeof(drvinfo->bus_info));
+}
+
+static void
+ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+
+ __ice_get_drvinfo(netdev, drvinfo, np->vsi);
+ drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE;
+}
+
+static int ice_get_regs_len(struct net_device __always_unused *netdev)
+{
+ return sizeof(ice_regs_dump_list);
+}
+
+static void
+ice_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_pf *pf = np->vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u32 *regs_buf = (u32 *)p;
+ unsigned int i;
+
+ regs->version = 1;
+
+ for (i = 0; i < ARRAY_SIZE(ice_regs_dump_list); ++i)
+ regs_buf[i] = rd32(hw, ice_regs_dump_list[i]);
+}
+
+static u32 ice_get_msglevel(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_pf *pf = np->vsi->back;
+
+#ifndef CONFIG_DYNAMIC_DEBUG
+ if (pf->hw.debug_mask)
+ netdev_info(netdev, "hw debug_mask: 0x%llX\n",
+ pf->hw.debug_mask);
+#endif /* !CONFIG_DYNAMIC_DEBUG */
+
+ return pf->msg_enable;
+}
+
+static void ice_set_msglevel(struct net_device *netdev, u32 data)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_pf *pf = np->vsi->back;
+
+#ifndef CONFIG_DYNAMIC_DEBUG
+ if (ICE_DBG_USER & data)
+ pf->hw.debug_mask = data;
+ else
+ pf->msg_enable = data;
+#else
+ pf->msg_enable = data;
+#endif /* !CONFIG_DYNAMIC_DEBUG */
+}
+
+static int ice_get_eeprom_len(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_pf *pf = np->vsi->back;
+
+ return (int)pf->hw.flash.flash_size;
+}
+
+static int
+ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom,
+ u8 *bytes)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ struct device *dev;
+ int ret;
+ u8 *buf;
+
+ dev = ice_pf_to_dev(pf);
+
+ eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+ netdev_dbg(netdev, "GEEPROM cmd 0x%08x, offset 0x%08x, len 0x%08x\n",
+ eeprom->cmd, eeprom->offset, eeprom->len);
+
+ buf = kzalloc(eeprom->len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ ret = ice_acquire_nvm(hw, ICE_RES_READ);
+ if (ret) {
+ dev_err(dev, "ice_acquire_nvm failed, err %d aq_err %s\n",
+ ret, ice_aq_str(hw->adminq.sq_last_status));
+ goto out;
+ }
+
+ ret = ice_read_flat_nvm(hw, eeprom->offset, &eeprom->len, buf,
+ false);
+ if (ret) {
+ dev_err(dev, "ice_read_flat_nvm failed, err %d aq_err %s\n",
+ ret, ice_aq_str(hw->adminq.sq_last_status));
+ goto release;
+ }
+
+ memcpy(bytes, buf, eeprom->len);
+release:
+ ice_release_nvm(hw);
+out:
+ kfree(buf);
+ return ret;
+}
+
+/**
+ * ice_active_vfs - check if there are any active VFs
+ * @pf: board private structure
+ *
+ * Returns true if an active VF is found, otherwise returns false
+ */
+static bool ice_active_vfs(struct ice_pf *pf)
+{
+ bool active = false;
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ rcu_read_lock();
+ ice_for_each_vf_rcu(pf, bkt, vf) {
+ if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ active = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return active;
+}
+
+/**
+ * ice_link_test - perform a link test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_link_test(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ bool link_up = false;
+ int status;
+
+ netdev_info(netdev, "link test\n");
+ status = ice_get_link_status(np->vsi->port_info, &link_up);
+ if (status) {
+ netdev_err(netdev, "link query error, status = %d\n",
+ status);
+ return 1;
+ }
+
+ if (!link_up)
+ return 2;
+
+ return 0;
+}
+
+/**
+ * ice_eeprom_test - perform an EEPROM test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_eeprom_test(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_pf *pf = np->vsi->back;
+
+ netdev_info(netdev, "EEPROM test\n");
+ return !!(ice_nvm_validate_checksum(&pf->hw));
+}
+
+/**
+ * ice_reg_pattern_test
+ * @hw: pointer to the HW struct
+ * @reg: reg to be tested
+ * @mask: bits to be touched
+ */
+static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
+{
+ struct ice_pf *pf = (struct ice_pf *)hw->back;
+ struct device *dev = ice_pf_to_dev(pf);
+ static const u32 patterns[] = {
+ 0x5A5A5A5A, 0xA5A5A5A5,
+ 0x00000000, 0xFFFFFFFF
+ };
+ u32 val, orig_val;
+ unsigned int i;
+
+ orig_val = rd32(hw, reg);
+ for (i = 0; i < ARRAY_SIZE(patterns); ++i) {
+ u32 pattern = patterns[i] & mask;
+
+ wr32(hw, reg, pattern);
+ val = rd32(hw, reg);
+ if (val == pattern)
+ continue;
+ dev_err(dev, "%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n"
+ , __func__, reg, pattern, val);
+ return 1;
+ }
+
+ wr32(hw, reg, orig_val);
+ val = rd32(hw, reg);
+ if (val != orig_val) {
+ dev_err(dev, "%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n"
+ , __func__, reg, orig_val, val);
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_reg_test - perform a register test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_reg_test(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_hw *hw = np->vsi->port_info->hw;
+ u32 int_elements = hw->func_caps.common_cap.num_msix_vectors ?
+ hw->func_caps.common_cap.num_msix_vectors - 1 : 1;
+ struct ice_diag_reg_test_info {
+ u32 address;
+ u32 mask;
+ u32 elem_num;
+ u32 elem_size;
+ } ice_reg_list[] = {
+ {GLINT_ITR(0, 0), 0x00000fff, int_elements,
+ GLINT_ITR(0, 1) - GLINT_ITR(0, 0)},
+ {GLINT_ITR(1, 0), 0x00000fff, int_elements,
+ GLINT_ITR(1, 1) - GLINT_ITR(1, 0)},
+ {GLINT_ITR(0, 0), 0x00000fff, int_elements,
+ GLINT_ITR(2, 1) - GLINT_ITR(2, 0)},
+ {GLINT_CTL, 0xffff0001, 1, 0}
+ };
+ unsigned int i;
+
+ netdev_dbg(netdev, "Register test\n");
+ for (i = 0; i < ARRAY_SIZE(ice_reg_list); ++i) {
+ u32 j;
+
+ for (j = 0; j < ice_reg_list[i].elem_num; ++j) {
+ u32 mask = ice_reg_list[i].mask;
+ u32 reg = ice_reg_list[i].address +
+ (j * ice_reg_list[i].elem_size);
+
+ /* bail on failure (non-zero return) */
+ if (ice_reg_pattern_test(hw, reg, mask))
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_lbtest_prepare_rings - configure Tx/Rx test rings
+ * @vsi: pointer to the VSI structure
+ *
+ * Function configures rings of a VSI for loopback test without
+ * enabling interrupts or informing the kernel about new queues.
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_lbtest_prepare_rings(struct ice_vsi *vsi)
+{
+ int status;
+
+ status = ice_vsi_setup_tx_rings(vsi);
+ if (status)
+ goto err_setup_tx_ring;
+
+ status = ice_vsi_setup_rx_rings(vsi);
+ if (status)
+ goto err_setup_rx_ring;
+
+ status = ice_vsi_cfg(vsi);
+ if (status)
+ goto err_setup_rx_ring;
+
+ status = ice_vsi_start_all_rx_rings(vsi);
+ if (status)
+ goto err_start_rx_ring;
+
+ return status;
+
+err_start_rx_ring:
+ ice_vsi_free_rx_rings(vsi);
+err_setup_rx_ring:
+ ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+err_setup_tx_ring:
+ ice_vsi_free_tx_rings(vsi);
+
+ return status;
+}
+
+/**
+ * ice_lbtest_disable_rings - disable Tx/Rx test rings after loopback test
+ * @vsi: pointer to the VSI structure
+ *
+ * Function stops and frees VSI rings after a loopback test.
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_lbtest_disable_rings(struct ice_vsi *vsi)
+{
+ int status;
+
+ status = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+ if (status)
+ netdev_err(vsi->netdev, "Failed to stop Tx rings, VSI %d error %d\n",
+ vsi->vsi_num, status);
+
+ status = ice_vsi_stop_all_rx_rings(vsi);
+ if (status)
+ netdev_err(vsi->netdev, "Failed to stop Rx rings, VSI %d error %d\n",
+ vsi->vsi_num, status);
+
+ ice_vsi_free_tx_rings(vsi);
+ ice_vsi_free_rx_rings(vsi);
+
+ return status;
+}
+
+/**
+ * ice_lbtest_create_frame - create test packet
+ * @pf: pointer to the PF structure
+ * @ret_data: allocated frame buffer
+ * @size: size of the packet data
+ *
+ * Function allocates a frame with a test pattern on specific offsets.
+ * Returns 0 on success, non-zero on failure.
+ */
+static int ice_lbtest_create_frame(struct ice_pf *pf, u8 **ret_data, u16 size)
+{
+ u8 *data;
+
+ if (!pf)
+ return -EINVAL;
+
+ data = devm_kzalloc(ice_pf_to_dev(pf), size, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ /* Since the ethernet test frame should always be at least
+ * 64 bytes long, fill some octets in the payload with test data.
+ */
+ memset(data, 0xFF, size);
+ data[32] = 0xDE;
+ data[42] = 0xAD;
+ data[44] = 0xBE;
+ data[46] = 0xEF;
+
+ *ret_data = data;
+
+ return 0;
+}
+
+/**
+ * ice_lbtest_check_frame - verify received loopback frame
+ * @frame: pointer to the raw packet data
+ *
+ * Function verifies received test frame with a pattern.
+ * Returns true if frame matches the pattern, false otherwise.
+ */
+static bool ice_lbtest_check_frame(u8 *frame)
+{
+ /* Validate bytes of a frame under offsets chosen earlier */
+ if (frame[32] == 0xDE &&
+ frame[42] == 0xAD &&
+ frame[44] == 0xBE &&
+ frame[46] == 0xEF &&
+ frame[48] == 0xFF)
+ return true;
+
+ return false;
+}
+
+/**
+ * ice_diag_send - send test frames to the test ring
+ * @tx_ring: pointer to the transmit ring
+ * @data: pointer to the raw packet data
+ * @size: size of the packet to send
+ *
+ * Function sends loopback packets on a test Tx ring.
+ */
+static int ice_diag_send(struct ice_tx_ring *tx_ring, u8 *data, u16 size)
+{
+ struct ice_tx_desc *tx_desc;
+ struct ice_tx_buf *tx_buf;
+ dma_addr_t dma;
+ u64 td_cmd;
+
+ tx_desc = ICE_TX_DESC(tx_ring, tx_ring->next_to_use);
+ tx_buf = &tx_ring->tx_buf[tx_ring->next_to_use];
+
+ dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(tx_ring->dev, dma))
+ return -EINVAL;
+
+ tx_desc->buf_addr = cpu_to_le64(dma);
+
+ /* These flags are required for a descriptor to be pushed out */
+ td_cmd = (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
+ tx_desc->cmd_type_offset_bsz =
+ cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+ (td_cmd << ICE_TXD_QW1_CMD_S) |
+ ((u64)0 << ICE_TXD_QW1_OFFSET_S) |
+ ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+ ((u64)0 << ICE_TXD_QW1_L2TAG1_S));
+
+ tx_buf->next_to_watch = tx_desc;
+
+ /* Force memory write to complete before letting h/w know
+ * there are new descriptors to fetch.
+ */
+ wmb();
+
+ tx_ring->next_to_use++;
+ if (tx_ring->next_to_use >= tx_ring->count)
+ tx_ring->next_to_use = 0;
+
+ writel_relaxed(tx_ring->next_to_use, tx_ring->tail);
+
+ /* Wait until the packets get transmitted to the receive queue. */
+ usleep_range(1000, 2000);
+ dma_unmap_single(tx_ring->dev, dma, size, DMA_TO_DEVICE);
+
+ return 0;
+}
+
+#define ICE_LB_FRAME_SIZE 64
+/**
+ * ice_lbtest_receive_frames - receive and verify test frames
+ * @rx_ring: pointer to the receive ring
+ *
+ * Function receives loopback packets and verify their correctness.
+ * Returns number of received valid frames.
+ */
+static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring)
+{
+ struct ice_rx_buf *rx_buf;
+ int valid_frames, i;
+ u8 *received_buf;
+
+ valid_frames = 0;
+
+ for (i = 0; i < rx_ring->count; i++) {
+ union ice_32b_rx_flex_desc *rx_desc;
+
+ rx_desc = ICE_RX_DESC(rx_ring, i);
+
+ if (!(rx_desc->wb.status_error0 &
+ (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) |
+ cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)))))
+ continue;
+
+ rx_buf = &rx_ring->rx_buf[i];
+ received_buf = page_address(rx_buf->page) + rx_buf->page_offset;
+
+ if (ice_lbtest_check_frame(received_buf))
+ valid_frames++;
+ }
+
+ return valid_frames;
+}
+
+/**
+ * ice_loopback_test - perform a loopback test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_loopback_test(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *orig_vsi = np->vsi, *test_vsi;
+ struct ice_pf *pf = orig_vsi->back;
+ u8 broadcast[ETH_ALEN], ret = 0;
+ int num_frames, valid_frames;
+ struct ice_tx_ring *tx_ring;
+ struct ice_rx_ring *rx_ring;
+ struct device *dev;
+ u8 *tx_frame;
+ int i;
+
+ dev = ice_pf_to_dev(pf);
+ netdev_info(netdev, "loopback test\n");
+
+ test_vsi = ice_lb_vsi_setup(pf, pf->hw.port_info);
+ if (!test_vsi) {
+ netdev_err(netdev, "Failed to create a VSI for the loopback test\n");
+ return 1;
+ }
+
+ test_vsi->netdev = netdev;
+ tx_ring = test_vsi->tx_rings[0];
+ rx_ring = test_vsi->rx_rings[0];
+
+ if (ice_lbtest_prepare_rings(test_vsi)) {
+ ret = 2;
+ goto lbtest_vsi_close;
+ }
+
+ if (ice_alloc_rx_bufs(rx_ring, rx_ring->count)) {
+ ret = 3;
+ goto lbtest_rings_dis;
+ }
+
+ /* Enable MAC loopback in firmware */
+ if (ice_aq_set_mac_loopback(&pf->hw, true, NULL)) {
+ ret = 4;
+ goto lbtest_mac_dis;
+ }
+
+ /* Test VSI needs to receive broadcast packets */
+ eth_broadcast_addr(broadcast);
+ if (ice_fltr_add_mac(test_vsi, broadcast, ICE_FWD_TO_VSI)) {
+ ret = 5;
+ goto lbtest_mac_dis;
+ }
+
+ if (ice_lbtest_create_frame(pf, &tx_frame, ICE_LB_FRAME_SIZE)) {
+ ret = 7;
+ goto remove_mac_filters;
+ }
+
+ num_frames = min_t(int, tx_ring->count, 32);
+ for (i = 0; i < num_frames; i++) {
+ if (ice_diag_send(tx_ring, tx_frame, ICE_LB_FRAME_SIZE)) {
+ ret = 8;
+ goto lbtest_free_frame;
+ }
+ }
+
+ valid_frames = ice_lbtest_receive_frames(rx_ring);
+ if (!valid_frames)
+ ret = 9;
+ else if (valid_frames != num_frames)
+ ret = 10;
+
+lbtest_free_frame:
+ devm_kfree(dev, tx_frame);
+remove_mac_filters:
+ if (ice_fltr_remove_mac(test_vsi, broadcast, ICE_FWD_TO_VSI))
+ netdev_err(netdev, "Could not remove MAC filter for the test VSI\n");
+lbtest_mac_dis:
+ /* Disable MAC loopback after the test is completed. */
+ if (ice_aq_set_mac_loopback(&pf->hw, false, NULL))
+ netdev_err(netdev, "Could not disable MAC loopback\n");
+lbtest_rings_dis:
+ if (ice_lbtest_disable_rings(test_vsi))
+ netdev_err(netdev, "Could not disable test rings\n");
+lbtest_vsi_close:
+ test_vsi->netdev = NULL;
+ if (ice_vsi_release(test_vsi))
+ netdev_err(netdev, "Failed to remove the test VSI\n");
+
+ return ret;
+}
+
+/**
+ * ice_intr_test - perform an interrupt test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_intr_test(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_pf *pf = np->vsi->back;
+ u16 swic_old = pf->sw_int_count;
+
+ netdev_info(netdev, "interrupt test\n");
+
+ wr32(&pf->hw, GLINT_DYN_CTL(pf->oicr_idx),
+ GLINT_DYN_CTL_SW_ITR_INDX_M |
+ GLINT_DYN_CTL_INTENA_MSK_M |
+ GLINT_DYN_CTL_SWINT_TRIG_M);
+
+ usleep_range(1000, 2000);
+ return (swic_old == pf->sw_int_count);
+}
+
+/**
+ * ice_self_test - handler function for performing a self-test by ethtool
+ * @netdev: network interface device structure
+ * @eth_test: ethtool_test structure
+ * @data: required by ethtool.self_test
+ *
+ * This function is called after invoking 'ethtool -t devname' command where
+ * devname is the name of the network device on which ethtool should operate.
+ * It performs a set of self-tests to check if a device works properly.
+ */
+static void
+ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
+ u64 *data)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ bool if_running = netif_running(netdev);
+ struct ice_pf *pf = np->vsi->back;
+ struct device *dev;
+
+ dev = ice_pf_to_dev(pf);
+
+ if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+ netdev_info(netdev, "offline testing starting\n");
+
+ set_bit(ICE_TESTING, pf->state);
+
+ if (ice_active_vfs(pf)) {
+ dev_warn(dev, "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
+ data[ICE_ETH_TEST_REG] = 1;
+ data[ICE_ETH_TEST_EEPROM] = 1;
+ data[ICE_ETH_TEST_INTR] = 1;
+ data[ICE_ETH_TEST_LOOP] = 1;
+ data[ICE_ETH_TEST_LINK] = 1;
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+ clear_bit(ICE_TESTING, pf->state);
+ goto skip_ol_tests;
+ }
+ /* If the device is online then take it offline */
+ if (if_running)
+ /* indicate we're in test mode */
+ ice_stop(netdev);
+
+ data[ICE_ETH_TEST_LINK] = ice_link_test(netdev);
+ data[ICE_ETH_TEST_EEPROM] = ice_eeprom_test(netdev);
+ data[ICE_ETH_TEST_INTR] = ice_intr_test(netdev);
+ data[ICE_ETH_TEST_LOOP] = ice_loopback_test(netdev);
+ data[ICE_ETH_TEST_REG] = ice_reg_test(netdev);
+
+ if (data[ICE_ETH_TEST_LINK] ||
+ data[ICE_ETH_TEST_EEPROM] ||
+ data[ICE_ETH_TEST_LOOP] ||
+ data[ICE_ETH_TEST_INTR] ||
+ data[ICE_ETH_TEST_REG])
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ clear_bit(ICE_TESTING, pf->state);
+
+ if (if_running) {
+ int status = ice_open(netdev);
+
+ if (status) {
+ dev_err(dev, "Could not open device %s, err %d\n",
+ pf->int_name, status);
+ }
+ }
+ } else {
+ /* Online tests */
+ netdev_info(netdev, "online testing starting\n");
+
+ data[ICE_ETH_TEST_LINK] = ice_link_test(netdev);
+ if (data[ICE_ETH_TEST_LINK])
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ /* Offline only tests, not run in online; pass by default */
+ data[ICE_ETH_TEST_REG] = 0;
+ data[ICE_ETH_TEST_EEPROM] = 0;
+ data[ICE_ETH_TEST_INTR] = 0;
+ data[ICE_ETH_TEST_LOOP] = 0;
+ }
+
+skip_ol_tests:
+ netdev_info(netdev, "testing finished\n");
+}
+
+static void
+__ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data,
+ struct ice_vsi *vsi)
+{
+ unsigned int i;
+ u8 *p = data;
+
+ switch (stringset) {
+ case ETH_SS_STATS:
+ for (i = 0; i < ICE_VSI_STATS_LEN; i++)
+ ethtool_sprintf(&p,
+ ice_gstrings_vsi_stats[i].stat_string);
+
+ if (ice_is_port_repr_netdev(netdev))
+ return;
+
+ ice_for_each_alloc_txq(vsi, i) {
+ ethtool_sprintf(&p, "tx_queue_%u_packets", i);
+ ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
+ }
+
+ ice_for_each_alloc_rxq(vsi, i) {
+ ethtool_sprintf(&p, "rx_queue_%u_packets", i);
+ ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
+ }
+
+ if (vsi->type != ICE_VSI_PF)
+ return;
+
+ for (i = 0; i < ICE_PF_STATS_LEN; i++)
+ ethtool_sprintf(&p,
+ ice_gstrings_pf_stats[i].stat_string);
+
+ for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
+ ethtool_sprintf(&p, "tx_priority_%u_xon.nic", i);
+ ethtool_sprintf(&p, "tx_priority_%u_xoff.nic", i);
+ }
+ for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
+ ethtool_sprintf(&p, "rx_priority_%u_xon.nic", i);
+ ethtool_sprintf(&p, "rx_priority_%u_xoff.nic", i);
+ }
+ break;
+ case ETH_SS_TEST:
+ memcpy(data, ice_gstrings_test, ICE_TEST_LEN * ETH_GSTRING_LEN);
+ break;
+ case ETH_SS_PRIV_FLAGS:
+ for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++)
+ ethtool_sprintf(&p, ice_gstrings_priv_flags[i].name);
+ break;
+ default:
+ break;
+ }
+}
+
+static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+
+ __ice_get_strings(netdev, stringset, data, np->vsi);
+}
+
+static int
+ice_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ bool led_active;
+
+ switch (state) {
+ case ETHTOOL_ID_ACTIVE:
+ led_active = true;
+ break;
+ case ETHTOOL_ID_INACTIVE:
+ led_active = false;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (ice_aq_set_port_id_led(np->vsi->port_info, !led_active, NULL))
+ return -EIO;
+
+ return 0;
+}
+
+/**
+ * ice_set_fec_cfg - Set link FEC options
+ * @netdev: network interface device structure
+ * @req_fec: FEC mode to configure
+ */
+static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_aqc_set_phy_cfg_data config = { 0 };
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_port_info *pi;
+
+ pi = vsi->port_info;
+ if (!pi)
+ return -EOPNOTSUPP;
+
+ /* Changing the FEC parameters is not supported if not the PF VSI */
+ if (vsi->type != ICE_VSI_PF) {
+ netdev_info(netdev, "Changing FEC parameters only supported for PF VSI\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* Proceed only if requesting different FEC mode */
+ if (pi->phy.curr_user_fec_req == req_fec)
+ return 0;
+
+ /* Copy the current user PHY configuration. The current user PHY
+ * configuration is initialized during probe from PHY capabilities
+ * software mode, and updated on set PHY configuration.
+ */
+ memcpy(&config, &pi->phy.curr_user_phy_cfg, sizeof(config));
+
+ ice_cfg_phy_fec(pi, &config, req_fec);
+ config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+ if (ice_aq_set_phy_cfg(pi->hw, pi, &config, NULL))
+ return -EAGAIN;
+
+ /* Save requested FEC config */
+ pi->phy.curr_user_fec_req = req_fec;
+
+ return 0;
+}
+
+/**
+ * ice_set_fecparam - Set FEC link options
+ * @netdev: network interface device structure
+ * @fecparam: Ethtool structure to retrieve FEC parameters
+ */
+static int
+ice_set_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ enum ice_fec_mode fec;
+
+ switch (fecparam->fec) {
+ case ETHTOOL_FEC_AUTO:
+ fec = ICE_FEC_AUTO;
+ break;
+ case ETHTOOL_FEC_RS:
+ fec = ICE_FEC_RS;
+ break;
+ case ETHTOOL_FEC_BASER:
+ fec = ICE_FEC_BASER;
+ break;
+ case ETHTOOL_FEC_OFF:
+ case ETHTOOL_FEC_NONE:
+ fec = ICE_FEC_NONE;
+ break;
+ default:
+ dev_warn(ice_pf_to_dev(vsi->back), "Unsupported FEC mode: %d\n",
+ fecparam->fec);
+ return -EINVAL;
+ }
+
+ return ice_set_fec_cfg(netdev, fec);
+}
+
+/**
+ * ice_get_fecparam - Get link FEC options
+ * @netdev: network interface device structure
+ * @fecparam: Ethtool structure to retrieve FEC parameters
+ */
+static int
+ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_aqc_get_phy_caps_data *caps;
+ struct ice_link_status *link_info;
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_port_info *pi;
+ int err;
+
+ pi = vsi->port_info;
+
+ if (!pi)
+ return -EOPNOTSUPP;
+ link_info = &pi->phy.link_info;
+
+ /* Set FEC mode based on negotiated link info */
+ switch (link_info->fec_info) {
+ case ICE_AQ_LINK_25G_KR_FEC_EN:
+ fecparam->active_fec = ETHTOOL_FEC_BASER;
+ break;
+ case ICE_AQ_LINK_25G_RS_528_FEC_EN:
+ case ICE_AQ_LINK_25G_RS_544_FEC_EN:
+ fecparam->active_fec = ETHTOOL_FEC_RS;
+ break;
+ default:
+ fecparam->active_fec = ETHTOOL_FEC_OFF;
+ break;
+ }
+
+ caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+ if (!caps)
+ return -ENOMEM;
+
+ err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
+ caps, NULL);
+ if (err)
+ goto done;
+
+ /* Set supported/configured FEC modes based on PHY capability */
+ if (caps->caps & ICE_AQC_PHY_EN_AUTO_FEC)
+ fecparam->fec |= ETHTOOL_FEC_AUTO;
+ if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+ fecparam->fec |= ETHTOOL_FEC_BASER;
+ if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
+ fecparam->fec |= ETHTOOL_FEC_RS;
+ if (caps->link_fec_options == 0)
+ fecparam->fec |= ETHTOOL_FEC_OFF;
+
+done:
+ kfree(caps);
+ return err;
+}
+
+/**
+ * ice_nway_reset - restart autonegotiation
+ * @netdev: network interface device structure
+ */
+static int ice_nway_reset(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ int err;
+
+ /* If VSI state is up, then restart autoneg with link up */
+ if (!test_bit(ICE_DOWN, vsi->back->state))
+ err = ice_set_link(vsi, true);
+ else
+ err = ice_set_link(vsi, false);
+
+ return err;
+}
+
+/**
+ * ice_get_priv_flags - report device private flags
+ * @netdev: network interface device structure
+ *
+ * The get string set count and the string set should be matched for each
+ * flag returned. Add new strings for each flag to the ice_gstrings_priv_flags
+ * array.
+ *
+ * Returns a u32 bitmap of flags.
+ */
+static u32 ice_get_priv_flags(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ u32 i, ret_flags = 0;
+
+ for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
+ const struct ice_priv_flag *priv_flag;
+
+ priv_flag = &ice_gstrings_priv_flags[i];
+
+ if (test_bit(priv_flag->bitno, pf->flags))
+ ret_flags |= BIT(i);
+ }
+
+ return ret_flags;
+}
+
+/**
+ * ice_set_priv_flags - set private flags
+ * @netdev: network interface device structure
+ * @flags: bit flags to be set
+ */
+static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ DECLARE_BITMAP(change_flags, ICE_PF_FLAGS_NBITS);
+ DECLARE_BITMAP(orig_flags, ICE_PF_FLAGS_NBITS);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ int ret = 0;
+ u32 i;
+
+ if (flags > BIT(ICE_PRIV_FLAG_ARRAY_SIZE))
+ return -EINVAL;
+
+ dev = ice_pf_to_dev(pf);
+ set_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
+
+ bitmap_copy(orig_flags, pf->flags, ICE_PF_FLAGS_NBITS);
+ for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
+ const struct ice_priv_flag *priv_flag;
+
+ priv_flag = &ice_gstrings_priv_flags[i];
+
+ if (flags & BIT(i))
+ set_bit(priv_flag->bitno, pf->flags);
+ else
+ clear_bit(priv_flag->bitno, pf->flags);
+ }
+
+ bitmap_xor(change_flags, pf->flags, orig_flags, ICE_PF_FLAGS_NBITS);
+
+ /* Do not allow change to link-down-on-close when Total Port Shutdown
+ * is enabled.
+ */
+ if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, change_flags) &&
+ test_bit(ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags)) {
+ dev_err(dev, "Setting link-down-on-close not supported on this port\n");
+ set_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags);
+ ret = -EINVAL;
+ goto ethtool_exit;
+ }
+
+ if (test_bit(ICE_FLAG_FW_LLDP_AGENT, change_flags)) {
+ if (!test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) {
+ int status;
+
+ /* Disable FW LLDP engine */
+ status = ice_cfg_lldp_mib_change(&pf->hw, false);
+
+ /* If unregistering for LLDP events fails, this is
+ * not an error state, as there shouldn't be any
+ * events to respond to.
+ */
+ if (status)
+ dev_info(dev, "Failed to unreg for LLDP events\n");
+
+ /* The AQ call to stop the FW LLDP agent will generate
+ * an error if the agent is already stopped.
+ */
+ status = ice_aq_stop_lldp(&pf->hw, true, true, NULL);
+ if (status)
+ dev_warn(dev, "Fail to stop LLDP agent\n");
+ /* Use case for having the FW LLDP agent stopped
+ * will likely not need DCB, so failure to init is
+ * not a concern of ethtool
+ */
+ status = ice_init_pf_dcb(pf, true);
+ if (status)
+ dev_warn(dev, "Fail to init DCB\n");
+
+ pf->dcbx_cap &= ~DCB_CAP_DCBX_LLD_MANAGED;
+ pf->dcbx_cap |= DCB_CAP_DCBX_HOST;
+ } else {
+ bool dcbx_agent_status;
+ int status;
+
+ if (ice_get_pfc_mode(pf) == ICE_QOS_MODE_DSCP) {
+ clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
+ dev_err(dev, "QoS in L3 DSCP mode, FW Agent not allowed to start\n");
+ ret = -EOPNOTSUPP;
+ goto ethtool_exit;
+ }
+
+ /* Remove rule to direct LLDP packets to default VSI.
+ * The FW LLDP engine will now be consuming them.
+ */
+ ice_cfg_sw_lldp(vsi, false, false);
+
+ /* AQ command to start FW LLDP agent will return an
+ * error if the agent is already started
+ */
+ status = ice_aq_start_lldp(&pf->hw, true, NULL);
+ if (status)
+ dev_warn(dev, "Fail to start LLDP Agent\n");
+
+ /* AQ command to start FW DCBX agent will fail if
+ * the agent is already started
+ */
+ status = ice_aq_start_stop_dcbx(&pf->hw, true,
+ &dcbx_agent_status,
+ NULL);
+ if (status)
+ dev_dbg(dev, "Failed to start FW DCBX\n");
+
+ dev_info(dev, "FW DCBX agent is %s\n",
+ dcbx_agent_status ? "ACTIVE" : "DISABLED");
+
+ /* Failure to configure MIB change or init DCB is not
+ * relevant to ethtool. Print notification that
+ * registration/init failed but do not return error
+ * state to ethtool
+ */
+ status = ice_init_pf_dcb(pf, true);
+ if (status)
+ dev_dbg(dev, "Fail to init DCB\n");
+
+ /* Register for MIB change events */
+ status = ice_cfg_lldp_mib_change(&pf->hw, true);
+ if (status)
+ dev_dbg(dev, "Fail to enable MIB change events\n");
+
+ pf->dcbx_cap &= ~DCB_CAP_DCBX_HOST;
+ pf->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED;
+
+ ice_nway_reset(netdev);
+ }
+ }
+ if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) {
+ /* down and up VSI so that changes of Rx cfg are reflected. */
+ ice_down_up(vsi);
+ }
+ /* don't allow modification of this flag when a single VF is in
+ * promiscuous mode because it's not supported
+ */
+ if (test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, change_flags) &&
+ ice_is_any_vf_in_unicast_promisc(pf)) {
+ dev_err(dev, "Changing vf-true-promisc-support flag while VF(s) are in promiscuous mode not supported\n");
+ /* toggle bit back to previous state */
+ change_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags);
+ ret = -EAGAIN;
+ }
+
+ if (test_bit(ICE_FLAG_VF_VLAN_PRUNING, change_flags) &&
+ ice_has_vfs(pf)) {
+ dev_err(dev, "vf-vlan-pruning: VLAN pruning cannot be changed while VFs are active.\n");
+ /* toggle bit back to previous state */
+ change_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags);
+ ret = -EOPNOTSUPP;
+ }
+ethtool_exit:
+ clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
+ return ret;
+}
+
+static int ice_get_sset_count(struct net_device *netdev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ /* The number (and order) of strings reported *must* remain
+ * constant for a given netdevice. This function must not
+ * report a different number based on run time parameters
+ * (such as the number of queues in use, or the setting of
+ * a private ethtool flag). This is due to the nature of the
+ * ethtool stats API.
+ *
+ * Userspace programs such as ethtool must make 3 separate
+ * ioctl requests, one for size, one for the strings, and
+ * finally one for the stats. Since these cross into
+ * userspace, changes to the number or size could result in
+ * undefined memory access or incorrect string<->value
+ * correlations for statistics.
+ *
+ * Even if it appears to be safe, changes to the size or
+ * order of strings will suffer from race conditions and are
+ * not safe.
+ */
+ return ICE_ALL_STATS_LEN(netdev);
+ case ETH_SS_TEST:
+ return ICE_TEST_LEN;
+ case ETH_SS_PRIV_FLAGS:
+ return ICE_PRIV_FLAG_ARRAY_SIZE;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void
+__ice_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats __always_unused *stats, u64 *data,
+ struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_tx_ring *tx_ring;
+ struct ice_rx_ring *rx_ring;
+ unsigned int j;
+ int i = 0;
+ char *p;
+
+ ice_update_pf_stats(pf);
+ ice_update_vsi_stats(vsi);
+
+ for (j = 0; j < ICE_VSI_STATS_LEN; j++) {
+ p = (char *)vsi + ice_gstrings_vsi_stats[j].stat_offset;
+ data[i++] = (ice_gstrings_vsi_stats[j].sizeof_stat ==
+ sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+ }
+
+ if (ice_is_port_repr_netdev(netdev))
+ return;
+
+ /* populate per queue stats */
+ rcu_read_lock();
+
+ ice_for_each_alloc_txq(vsi, j) {
+ tx_ring = READ_ONCE(vsi->tx_rings[j]);
+ if (tx_ring) {
+ data[i++] = tx_ring->stats.pkts;
+ data[i++] = tx_ring->stats.bytes;
+ } else {
+ data[i++] = 0;
+ data[i++] = 0;
+ }
+ }
+
+ ice_for_each_alloc_rxq(vsi, j) {
+ rx_ring = READ_ONCE(vsi->rx_rings[j]);
+ if (rx_ring) {
+ data[i++] = rx_ring->stats.pkts;
+ data[i++] = rx_ring->stats.bytes;
+ } else {
+ data[i++] = 0;
+ data[i++] = 0;
+ }
+ }
+
+ rcu_read_unlock();
+
+ if (vsi->type != ICE_VSI_PF)
+ return;
+
+ for (j = 0; j < ICE_PF_STATS_LEN; j++) {
+ p = (char *)pf + ice_gstrings_pf_stats[j].stat_offset;
+ data[i++] = (ice_gstrings_pf_stats[j].sizeof_stat ==
+ sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+ }
+
+ for (j = 0; j < ICE_MAX_USER_PRIORITY; j++) {
+ data[i++] = pf->stats.priority_xon_tx[j];
+ data[i++] = pf->stats.priority_xoff_tx[j];
+ }
+
+ for (j = 0; j < ICE_MAX_USER_PRIORITY; j++) {
+ data[i++] = pf->stats.priority_xon_rx[j];
+ data[i++] = pf->stats.priority_xoff_rx[j];
+ }
+}
+
+static void
+ice_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats __always_unused *stats, u64 *data)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+
+ __ice_get_ethtool_stats(netdev, stats, data, np->vsi);
+}
+
+#define ICE_PHY_TYPE_LOW_MASK_MIN_1G (ICE_PHY_TYPE_LOW_100BASE_TX | \
+ ICE_PHY_TYPE_LOW_100M_SGMII)
+
+#define ICE_PHY_TYPE_LOW_MASK_MIN_25G (ICE_PHY_TYPE_LOW_MASK_MIN_1G | \
+ ICE_PHY_TYPE_LOW_1000BASE_T | \
+ ICE_PHY_TYPE_LOW_1000BASE_SX | \
+ ICE_PHY_TYPE_LOW_1000BASE_LX | \
+ ICE_PHY_TYPE_LOW_1000BASE_KX | \
+ ICE_PHY_TYPE_LOW_1G_SGMII | \
+ ICE_PHY_TYPE_LOW_2500BASE_T | \
+ ICE_PHY_TYPE_LOW_2500BASE_X | \
+ ICE_PHY_TYPE_LOW_2500BASE_KX | \
+ ICE_PHY_TYPE_LOW_5GBASE_T | \
+ ICE_PHY_TYPE_LOW_5GBASE_KR | \
+ ICE_PHY_TYPE_LOW_10GBASE_T | \
+ ICE_PHY_TYPE_LOW_10G_SFI_DA | \
+ ICE_PHY_TYPE_LOW_10GBASE_SR | \
+ ICE_PHY_TYPE_LOW_10GBASE_LR | \
+ ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 | \
+ ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC | \
+ ICE_PHY_TYPE_LOW_10G_SFI_C2C)
+
+#define ICE_PHY_TYPE_LOW_MASK_100G (ICE_PHY_TYPE_LOW_100GBASE_CR4 | \
+ ICE_PHY_TYPE_LOW_100GBASE_SR4 | \
+ ICE_PHY_TYPE_LOW_100GBASE_LR4 | \
+ ICE_PHY_TYPE_LOW_100GBASE_KR4 | \
+ ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC | \
+ ICE_PHY_TYPE_LOW_100G_CAUI4 | \
+ ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC | \
+ ICE_PHY_TYPE_LOW_100G_AUI4 | \
+ ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 | \
+ ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 | \
+ ICE_PHY_TYPE_LOW_100GBASE_CP2 | \
+ ICE_PHY_TYPE_LOW_100GBASE_SR2 | \
+ ICE_PHY_TYPE_LOW_100GBASE_DR)
+
+#define ICE_PHY_TYPE_HIGH_MASK_100G (ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4 | \
+ ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC |\
+ ICE_PHY_TYPE_HIGH_100G_CAUI2 | \
+ ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC | \
+ ICE_PHY_TYPE_HIGH_100G_AUI2)
+
+/**
+ * ice_mask_min_supported_speeds
+ * @hw: pointer to the HW structure
+ * @phy_types_high: PHY type high
+ * @phy_types_low: PHY type low to apply minimum supported speeds mask
+ *
+ * Apply minimum supported speeds mask to PHY type low. These are the speeds
+ * for ethtool supported link mode.
+ */
+static void
+ice_mask_min_supported_speeds(struct ice_hw *hw,
+ u64 phy_types_high, u64 *phy_types_low)
+{
+ /* if QSFP connection with 100G speed, minimum supported speed is 25G */
+ if (*phy_types_low & ICE_PHY_TYPE_LOW_MASK_100G ||
+ phy_types_high & ICE_PHY_TYPE_HIGH_MASK_100G)
+ *phy_types_low &= ~ICE_PHY_TYPE_LOW_MASK_MIN_25G;
+ else if (!ice_is_100m_speed_supported(hw))
+ *phy_types_low &= ~ICE_PHY_TYPE_LOW_MASK_MIN_1G;
+}
+
+#define ice_ethtool_advertise_link_mode(aq_link_speed, ethtool_link_mode) \
+ do { \
+ if (req_speeds & (aq_link_speed) || \
+ (!req_speeds && \
+ (advert_phy_type_lo & phy_type_mask_lo || \
+ advert_phy_type_hi & phy_type_mask_hi))) \
+ ethtool_link_ksettings_add_link_mode(ks, advertising,\
+ ethtool_link_mode); \
+ } while (0)
+
+/**
+ * ice_phy_type_to_ethtool - convert the phy_types to ethtool link modes
+ * @netdev: network interface device structure
+ * @ks: ethtool link ksettings struct to fill out
+ */
+static void
+ice_phy_type_to_ethtool(struct net_device *netdev,
+ struct ethtool_link_ksettings *ks)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ u64 advert_phy_type_lo = 0;
+ u64 advert_phy_type_hi = 0;
+ u64 phy_type_mask_lo = 0;
+ u64 phy_type_mask_hi = 0;
+ u64 phy_types_high = 0;
+ u64 phy_types_low = 0;
+ u16 req_speeds;
+
+ req_speeds = vsi->port_info->phy.link_info.req_speeds;
+
+ /* Check if lenient mode is supported and enabled, or in strict mode.
+ *
+ * In lenient mode the Supported link modes are the PHY types without
+ * media. The Advertising link mode is either 1. the user requested
+ * speed, 2. the override PHY mask, or 3. the PHY types with media.
+ *
+ * In strict mode Supported link mode are the PHY type with media,
+ * and Advertising link modes are the media PHY type or the speed
+ * requested by user.
+ */
+ if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags)) {
+ phy_types_low = le64_to_cpu(pf->nvm_phy_type_lo);
+ phy_types_high = le64_to_cpu(pf->nvm_phy_type_hi);
+
+ ice_mask_min_supported_speeds(&pf->hw, phy_types_high,
+ &phy_types_low);
+ /* determine advertised modes based on link override only
+ * if it's supported and if the FW doesn't abstract the
+ * driver from having to account for link overrides
+ */
+ if (ice_fw_supports_link_override(&pf->hw) &&
+ !ice_fw_supports_report_dflt_cfg(&pf->hw)) {
+ struct ice_link_default_override_tlv *ldo;
+
+ ldo = &pf->link_dflt_override;
+ /* If override enabled and PHY mask set, then
+ * Advertising link mode is the intersection of the PHY
+ * types without media and the override PHY mask.
+ */
+ if (ldo->options & ICE_LINK_OVERRIDE_EN &&
+ (ldo->phy_type_low || ldo->phy_type_high)) {
+ advert_phy_type_lo =
+ le64_to_cpu(pf->nvm_phy_type_lo) &
+ ldo->phy_type_low;
+ advert_phy_type_hi =
+ le64_to_cpu(pf->nvm_phy_type_hi) &
+ ldo->phy_type_high;
+ }
+ }
+ } else {
+ /* strict mode */
+ phy_types_low = vsi->port_info->phy.phy_type_low;
+ phy_types_high = vsi->port_info->phy.phy_type_high;
+ }
+
+ /* If Advertising link mode PHY type is not using override PHY type,
+ * then use PHY type with media.
+ */
+ if (!advert_phy_type_lo && !advert_phy_type_hi) {
+ advert_phy_type_lo = vsi->port_info->phy.phy_type_low;
+ advert_phy_type_hi = vsi->port_info->phy.phy_type_high;
+ }
+
+ ethtool_link_ksettings_zero_link_mode(ks, supported);
+ ethtool_link_ksettings_zero_link_mode(ks, advertising);
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_100BASE_TX |
+ ICE_PHY_TYPE_LOW_100M_SGMII;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 100baseT_Full);
+
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100MB,
+ 100baseT_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_T |
+ ICE_PHY_TYPE_LOW_1G_SGMII;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseT_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB,
+ 1000baseT_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_KX;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseKX_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB,
+ 1000baseKX_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_SX |
+ ICE_PHY_TYPE_LOW_1000BASE_LX;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseX_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB,
+ 1000baseX_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_2500BASE_T;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 2500baseT_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_2500MB,
+ 2500baseT_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_2500BASE_X |
+ ICE_PHY_TYPE_LOW_2500BASE_KX;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 2500baseX_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_2500MB,
+ 2500baseX_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_5GBASE_T |
+ ICE_PHY_TYPE_LOW_5GBASE_KR;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 5000baseT_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_5GB,
+ 5000baseT_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_T |
+ ICE_PHY_TYPE_LOW_10G_SFI_DA |
+ ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC |
+ ICE_PHY_TYPE_LOW_10G_SFI_C2C;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseT_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB,
+ 10000baseT_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_KR_CR1;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseKR_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB,
+ 10000baseKR_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_SR;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseSR_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB,
+ 10000baseSR_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_LR;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseLR_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB,
+ 10000baseLR_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_T |
+ ICE_PHY_TYPE_LOW_25GBASE_CR |
+ ICE_PHY_TYPE_LOW_25GBASE_CR_S |
+ ICE_PHY_TYPE_LOW_25GBASE_CR1 |
+ ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC |
+ ICE_PHY_TYPE_LOW_25G_AUI_C2C;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseCR_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB,
+ 25000baseCR_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_SR |
+ ICE_PHY_TYPE_LOW_25GBASE_LR;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseSR_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB,
+ 25000baseSR_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_KR |
+ ICE_PHY_TYPE_LOW_25GBASE_KR_S |
+ ICE_PHY_TYPE_LOW_25GBASE_KR1;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseKR_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB,
+ 25000baseKR_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_KR4;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseKR4_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB,
+ 40000baseKR4_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_CR4 |
+ ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC |
+ ICE_PHY_TYPE_LOW_40G_XLAUI;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseCR4_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB,
+ 40000baseCR4_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_SR4;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseSR4_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB,
+ 40000baseSR4_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_LR4;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseLR4_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB,
+ 40000baseLR4_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_CR2 |
+ ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC |
+ ICE_PHY_TYPE_LOW_50G_LAUI2 |
+ ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC |
+ ICE_PHY_TYPE_LOW_50G_AUI2 |
+ ICE_PHY_TYPE_LOW_50GBASE_CP |
+ ICE_PHY_TYPE_LOW_50GBASE_SR |
+ ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC |
+ ICE_PHY_TYPE_LOW_50G_AUI1;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 50000baseCR2_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB,
+ 50000baseCR2_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_KR2 |
+ ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 50000baseKR2_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB,
+ 50000baseKR2_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_SR2 |
+ ICE_PHY_TYPE_LOW_50GBASE_LR2 |
+ ICE_PHY_TYPE_LOW_50GBASE_FR |
+ ICE_PHY_TYPE_LOW_50GBASE_LR;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 50000baseSR2_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB,
+ 50000baseSR2_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_CR4 |
+ ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC |
+ ICE_PHY_TYPE_LOW_100G_CAUI4 |
+ ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC |
+ ICE_PHY_TYPE_LOW_100G_AUI4 |
+ ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 |
+ ICE_PHY_TYPE_LOW_100GBASE_CP2;
+ phy_type_mask_hi = ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC |
+ ICE_PHY_TYPE_HIGH_100G_CAUI2 |
+ ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC |
+ ICE_PHY_TYPE_HIGH_100G_AUI2;
+ if (phy_types_low & phy_type_mask_lo ||
+ phy_types_high & phy_type_mask_hi) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 100000baseCR4_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
+ 100000baseCR4_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_SR4 |
+ ICE_PHY_TYPE_LOW_100GBASE_SR2;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 100000baseSR4_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
+ 100000baseSR4_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_LR4 |
+ ICE_PHY_TYPE_LOW_100GBASE_DR;
+ if (phy_types_low & phy_type_mask_lo) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 100000baseLR4_ER4_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
+ 100000baseLR4_ER4_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_KR4 |
+ ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4;
+ phy_type_mask_hi = ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4;
+ if (phy_types_low & phy_type_mask_lo ||
+ phy_types_high & phy_type_mask_hi) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 100000baseKR4_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
+ 100000baseKR4_Full);
+ }
+}
+
+#define TEST_SET_BITS_TIMEOUT 50
+#define TEST_SET_BITS_SLEEP_MAX 2000
+#define TEST_SET_BITS_SLEEP_MIN 1000
+
+/**
+ * ice_get_settings_link_up - Get Link settings for when link is up
+ * @ks: ethtool ksettings to fill in
+ * @netdev: network interface device structure
+ */
+static void
+ice_get_settings_link_up(struct ethtool_link_ksettings *ks,
+ struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_port_info *pi = np->vsi->port_info;
+ struct ice_link_status *link_info;
+ struct ice_vsi *vsi = np->vsi;
+
+ link_info = &vsi->port_info->phy.link_info;
+
+ /* Get supported and advertised settings from PHY ability with media */
+ ice_phy_type_to_ethtool(netdev, ks);
+
+ switch (link_info->link_speed) {
+ case ICE_AQ_LINK_SPEED_100GB:
+ ks->base.speed = SPEED_100000;
+ break;
+ case ICE_AQ_LINK_SPEED_50GB:
+ ks->base.speed = SPEED_50000;
+ break;
+ case ICE_AQ_LINK_SPEED_40GB:
+ ks->base.speed = SPEED_40000;
+ break;
+ case ICE_AQ_LINK_SPEED_25GB:
+ ks->base.speed = SPEED_25000;
+ break;
+ case ICE_AQ_LINK_SPEED_20GB:
+ ks->base.speed = SPEED_20000;
+ break;
+ case ICE_AQ_LINK_SPEED_10GB:
+ ks->base.speed = SPEED_10000;
+ break;
+ case ICE_AQ_LINK_SPEED_5GB:
+ ks->base.speed = SPEED_5000;
+ break;
+ case ICE_AQ_LINK_SPEED_2500MB:
+ ks->base.speed = SPEED_2500;
+ break;
+ case ICE_AQ_LINK_SPEED_1000MB:
+ ks->base.speed = SPEED_1000;
+ break;
+ case ICE_AQ_LINK_SPEED_100MB:
+ ks->base.speed = SPEED_100;
+ break;
+ default:
+ netdev_info(netdev, "WARNING: Unrecognized link_speed (0x%x).\n",
+ link_info->link_speed);
+ break;
+ }
+ ks->base.duplex = DUPLEX_FULL;
+
+ if (link_info->an_info & ICE_AQ_AN_COMPLETED)
+ ethtool_link_ksettings_add_link_mode(ks, lp_advertising,
+ Autoneg);
+
+ /* Set flow control negotiated Rx/Tx pause */
+ switch (pi->fc.current_mode) {
+ case ICE_FC_FULL:
+ ethtool_link_ksettings_add_link_mode(ks, lp_advertising, Pause);
+ break;
+ case ICE_FC_TX_PAUSE:
+ ethtool_link_ksettings_add_link_mode(ks, lp_advertising, Pause);
+ ethtool_link_ksettings_add_link_mode(ks, lp_advertising,
+ Asym_Pause);
+ break;
+ case ICE_FC_RX_PAUSE:
+ ethtool_link_ksettings_add_link_mode(ks, lp_advertising,
+ Asym_Pause);
+ break;
+ case ICE_FC_PFC:
+ default:
+ ethtool_link_ksettings_del_link_mode(ks, lp_advertising, Pause);
+ ethtool_link_ksettings_del_link_mode(ks, lp_advertising,
+ Asym_Pause);
+ break;
+ }
+}
+
+/**
+ * ice_get_settings_link_down - Get the Link settings when link is down
+ * @ks: ethtool ksettings to fill in
+ * @netdev: network interface device structure
+ *
+ * Reports link settings that can be determined when link is down
+ */
+static void
+ice_get_settings_link_down(struct ethtool_link_ksettings *ks,
+ struct net_device *netdev)
+{
+ /* link is down and the driver needs to fall back on
+ * supported PHY types to figure out what info to display
+ */
+ ice_phy_type_to_ethtool(netdev, ks);
+
+ /* With no link, speed and duplex are unknown */
+ ks->base.speed = SPEED_UNKNOWN;
+ ks->base.duplex = DUPLEX_UNKNOWN;
+}
+
+/**
+ * ice_get_link_ksettings - Get Link Speed and Duplex settings
+ * @netdev: network interface device structure
+ * @ks: ethtool ksettings
+ *
+ * Reports speed/duplex settings based on media_type
+ */
+static int
+ice_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *ks)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_aqc_get_phy_caps_data *caps;
+ struct ice_link_status *hw_link_info;
+ struct ice_vsi *vsi = np->vsi;
+ int err;
+
+ ethtool_link_ksettings_zero_link_mode(ks, supported);
+ ethtool_link_ksettings_zero_link_mode(ks, advertising);
+ ethtool_link_ksettings_zero_link_mode(ks, lp_advertising);
+ hw_link_info = &vsi->port_info->phy.link_info;
+
+ /* set speed and duplex */
+ if (hw_link_info->link_info & ICE_AQ_LINK_UP)
+ ice_get_settings_link_up(ks, netdev);
+ else
+ ice_get_settings_link_down(ks, netdev);
+
+ /* set autoneg settings */
+ ks->base.autoneg = (hw_link_info->an_info & ICE_AQ_AN_COMPLETED) ?
+ AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+ /* set media type settings */
+ switch (vsi->port_info->phy.media_type) {
+ case ICE_MEDIA_FIBER:
+ ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+ ks->base.port = PORT_FIBRE;
+ break;
+ case ICE_MEDIA_BASET:
+ ethtool_link_ksettings_add_link_mode(ks, supported, TP);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, TP);
+ ks->base.port = PORT_TP;
+ break;
+ case ICE_MEDIA_BACKPLANE:
+ ethtool_link_ksettings_add_link_mode(ks, supported, Backplane);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ Backplane);
+ ks->base.port = PORT_NONE;
+ break;
+ case ICE_MEDIA_DA:
+ ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE);
+ ks->base.port = PORT_DA;
+ break;
+ default:
+ ks->base.port = PORT_OTHER;
+ break;
+ }
+
+ /* flow control is symmetric and always supported */
+ ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
+
+ caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+ if (!caps)
+ return -ENOMEM;
+
+ err = ice_aq_get_phy_caps(vsi->port_info, false,
+ ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL);
+ if (err)
+ goto done;
+
+ /* Set the advertised flow control based on the PHY capability */
+ if ((caps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) &&
+ (caps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)) {
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ Asym_Pause);
+ } else if (caps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) {
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ Asym_Pause);
+ } else if (caps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE) {
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ Asym_Pause);
+ } else {
+ ethtool_link_ksettings_del_link_mode(ks, advertising, Pause);
+ ethtool_link_ksettings_del_link_mode(ks, advertising,
+ Asym_Pause);
+ }
+
+ /* Set advertised FEC modes based on PHY capability */
+ ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_NONE);
+
+ if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ FEC_BASER);
+ if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
+ ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
+
+ err = ice_aq_get_phy_caps(vsi->port_info, false,
+ ICE_AQC_REPORT_TOPO_CAP_MEDIA, caps, NULL);
+ if (err)
+ goto done;
+
+ /* Set supported FEC modes based on PHY capability */
+ ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE);
+
+ if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN)
+ ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
+ if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
+ ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
+
+ /* Set supported and advertised autoneg */
+ if (ice_is_phy_caps_an_enabled(caps)) {
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+ }
+
+done:
+ kfree(caps);
+ return err;
+}
+
+/**
+ * ice_ksettings_find_adv_link_speed - Find advertising link speed
+ * @ks: ethtool ksettings
+ */
+static u16
+ice_ksettings_find_adv_link_speed(const struct ethtool_link_ksettings *ks)
+{
+ u16 adv_link_speed = 0;
+
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 100baseT_Full))
+ adv_link_speed |= ICE_AQ_LINK_SPEED_100MB;
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseX_Full))
+ adv_link_speed |= ICE_AQ_LINK_SPEED_1000MB;
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseT_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseKX_Full))
+ adv_link_speed |= ICE_AQ_LINK_SPEED_1000MB;
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 2500baseT_Full))
+ adv_link_speed |= ICE_AQ_LINK_SPEED_2500MB;
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 2500baseX_Full))
+ adv_link_speed |= ICE_AQ_LINK_SPEED_2500MB;
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 5000baseT_Full))
+ adv_link_speed |= ICE_AQ_LINK_SPEED_5GB;
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 10000baseT_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 10000baseKR_Full))
+ adv_link_speed |= ICE_AQ_LINK_SPEED_10GB;
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 10000baseSR_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 10000baseLR_Full))
+ adv_link_speed |= ICE_AQ_LINK_SPEED_10GB;
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 25000baseCR_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 25000baseSR_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 25000baseKR_Full))
+ adv_link_speed |= ICE_AQ_LINK_SPEED_25GB;
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 40000baseCR4_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 40000baseSR4_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 40000baseLR4_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 40000baseKR4_Full))
+ adv_link_speed |= ICE_AQ_LINK_SPEED_40GB;
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 50000baseCR2_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 50000baseKR2_Full))
+ adv_link_speed |= ICE_AQ_LINK_SPEED_50GB;
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 50000baseSR2_Full))
+ adv_link_speed |= ICE_AQ_LINK_SPEED_50GB;
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 100000baseCR4_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 100000baseSR4_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 100000baseLR4_ER4_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 100000baseKR4_Full))
+ adv_link_speed |= ICE_AQ_LINK_SPEED_100GB;
+
+ return adv_link_speed;
+}
+
+/**
+ * ice_setup_autoneg
+ * @p: port info
+ * @ks: ethtool_link_ksettings
+ * @config: configuration that will be sent down to FW
+ * @autoneg_enabled: autonegotiation is enabled or not
+ * @autoneg_changed: will there a change in autonegotiation
+ * @netdev: network interface device structure
+ *
+ * Setup PHY autonegotiation feature
+ */
+static int
+ice_setup_autoneg(struct ice_port_info *p, struct ethtool_link_ksettings *ks,
+ struct ice_aqc_set_phy_cfg_data *config,
+ u8 autoneg_enabled, u8 *autoneg_changed,
+ struct net_device *netdev)
+{
+ int err = 0;
+
+ *autoneg_changed = 0;
+
+ /* Check autoneg */
+ if (autoneg_enabled == AUTONEG_ENABLE) {
+ /* If autoneg was not already enabled */
+ if (!(p->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)) {
+ /* If autoneg is not supported, return error */
+ if (!ethtool_link_ksettings_test_link_mode(ks,
+ supported,
+ Autoneg)) {
+ netdev_info(netdev, "Autoneg not supported on this phy.\n");
+ err = -EINVAL;
+ } else {
+ /* Autoneg is allowed to change */
+ config->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+ *autoneg_changed = 1;
+ }
+ }
+ } else {
+ /* If autoneg is currently enabled */
+ if (p->phy.link_info.an_info & ICE_AQ_AN_COMPLETED) {
+ /* If autoneg is supported 10GBASE_T is the only PHY
+ * that can disable it, so otherwise return error
+ */
+ if (ethtool_link_ksettings_test_link_mode(ks,
+ supported,
+ Autoneg)) {
+ netdev_info(netdev, "Autoneg cannot be disabled on this phy\n");
+ err = -EINVAL;
+ } else {
+ /* Autoneg is allowed to change */
+ config->caps &= ~ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+ *autoneg_changed = 1;
+ }
+ }
+ }
+
+ return err;
+}
+
+/**
+ * ice_set_phy_type_from_speed - set phy_types based on speeds
+ * and advertised modes
+ * @ks: ethtool link ksettings struct
+ * @phy_type_low: pointer to the lower part of phy_type
+ * @phy_type_high: pointer to the higher part of phy_type
+ * @adv_link_speed: targeted link speeds bitmap
+ */
+static void
+ice_set_phy_type_from_speed(const struct ethtool_link_ksettings *ks,
+ u64 *phy_type_low, u64 *phy_type_high,
+ u16 adv_link_speed)
+{
+ /* Handle 1000M speed in a special way because ice_update_phy_type
+ * enables all link modes, but having mixed copper and optical
+ * standards is not supported.
+ */
+ adv_link_speed &= ~ICE_AQ_LINK_SPEED_1000MB;
+
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseT_Full))
+ *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_T |
+ ICE_PHY_TYPE_LOW_1G_SGMII;
+
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseKX_Full))
+ *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_KX;
+
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseX_Full))
+ *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_SX |
+ ICE_PHY_TYPE_LOW_1000BASE_LX;
+
+ ice_update_phy_type(phy_type_low, phy_type_high, adv_link_speed);
+}
+
+/**
+ * ice_set_link_ksettings - Set Speed and Duplex
+ * @netdev: network interface device structure
+ * @ks: ethtool ksettings
+ *
+ * Set speed/duplex per media_types advertised/forced
+ */
+static int
+ice_set_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *ks)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT;
+ struct ethtool_link_ksettings copy_ks = *ks;
+ struct ethtool_link_ksettings safe_ks = {};
+ struct ice_aqc_get_phy_caps_data *phy_caps;
+ struct ice_aqc_set_phy_cfg_data config;
+ u16 adv_link_speed, curr_link_speed;
+ struct ice_pf *pf = np->vsi->back;
+ struct ice_port_info *pi;
+ u8 autoneg_changed = 0;
+ u64 phy_type_high = 0;
+ u64 phy_type_low = 0;
+ bool linkup;
+ int err;
+
+ pi = np->vsi->port_info;
+
+ if (!pi)
+ return -EIO;
+
+ if (pi->phy.media_type != ICE_MEDIA_BASET &&
+ pi->phy.media_type != ICE_MEDIA_FIBER &&
+ pi->phy.media_type != ICE_MEDIA_BACKPLANE &&
+ pi->phy.media_type != ICE_MEDIA_DA &&
+ pi->phy.link_info.link_info & ICE_AQ_LINK_UP)
+ return -EOPNOTSUPP;
+
+ phy_caps = kzalloc(sizeof(*phy_caps), GFP_KERNEL);
+ if (!phy_caps)
+ return -ENOMEM;
+
+ /* Get the PHY capabilities based on media */
+ if (ice_fw_supports_report_dflt_cfg(pi->hw))
+ err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
+ phy_caps, NULL);
+ else
+ err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
+ phy_caps, NULL);
+ if (err)
+ goto done;
+
+ /* save autoneg out of ksettings */
+ autoneg = copy_ks.base.autoneg;
+
+ /* Get link modes supported by hardware.*/
+ ice_phy_type_to_ethtool(netdev, &safe_ks);
+
+ /* and check against modes requested by user.
+ * Return an error if unsupported mode was set.
+ */
+ if (!bitmap_subset(copy_ks.link_modes.advertising,
+ safe_ks.link_modes.supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+ if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags))
+ netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
+ err = -EOPNOTSUPP;
+ goto done;
+ }
+
+ /* get our own copy of the bits to check against */
+ memset(&safe_ks, 0, sizeof(safe_ks));
+ safe_ks.base.cmd = copy_ks.base.cmd;
+ safe_ks.base.link_mode_masks_nwords =
+ copy_ks.base.link_mode_masks_nwords;
+ ice_get_link_ksettings(netdev, &safe_ks);
+
+ /* set autoneg back to what it currently is */
+ copy_ks.base.autoneg = safe_ks.base.autoneg;
+ /* we don't compare the speed */
+ copy_ks.base.speed = safe_ks.base.speed;
+
+ /* If copy_ks.base and safe_ks.base are not the same now, then they are
+ * trying to set something that we do not support.
+ */
+ if (memcmp(&copy_ks.base, &safe_ks.base, sizeof(copy_ks.base))) {
+ err = -EOPNOTSUPP;
+ goto done;
+ }
+
+ while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) {
+ timeout--;
+ if (!timeout) {
+ err = -EBUSY;
+ goto done;
+ }
+ usleep_range(TEST_SET_BITS_SLEEP_MIN, TEST_SET_BITS_SLEEP_MAX);
+ }
+
+ /* Copy the current user PHY configuration. The current user PHY
+ * configuration is initialized during probe from PHY capabilities
+ * software mode, and updated on set PHY configuration.
+ */
+ config = pi->phy.curr_user_phy_cfg;
+
+ config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+ /* Check autoneg */
+ err = ice_setup_autoneg(pi, &safe_ks, &config, autoneg, &autoneg_changed,
+ netdev);
+
+ if (err)
+ goto done;
+
+ /* Call to get the current link speed */
+ pi->phy.get_link_info = true;
+ err = ice_get_link_status(pi, &linkup);
+ if (err)
+ goto done;
+
+ curr_link_speed = pi->phy.curr_user_speed_req;
+ adv_link_speed = ice_ksettings_find_adv_link_speed(ks);
+
+ /* If speed didn't get set, set it to what it currently is.
+ * This is needed because if advertise is 0 (as it is when autoneg
+ * is disabled) then speed won't get set.
+ */
+ if (!adv_link_speed)
+ adv_link_speed = curr_link_speed;
+
+ /* Convert the advertise link speeds to their corresponded PHY_TYPE */
+ ice_set_phy_type_from_speed(ks, &phy_type_low, &phy_type_high,
+ adv_link_speed);
+
+ if (!autoneg_changed && adv_link_speed == curr_link_speed) {
+ netdev_info(netdev, "Nothing changed, exiting without setting anything.\n");
+ goto done;
+ }
+
+ /* save the requested speeds */
+ pi->phy.link_info.req_speeds = adv_link_speed;
+
+ /* set link and auto negotiation so changes take effect */
+ config.caps |= ICE_AQ_PHY_ENA_LINK;
+
+ /* check if there is a PHY type for the requested advertised speed */
+ if (!(phy_type_low || phy_type_high)) {
+ netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
+ err = -EOPNOTSUPP;
+ goto done;
+ }
+
+ /* intersect requested advertised speed PHY types with media PHY types
+ * for set PHY configuration
+ */
+ config.phy_type_high = cpu_to_le64(phy_type_high) &
+ phy_caps->phy_type_high;
+ config.phy_type_low = cpu_to_le64(phy_type_low) &
+ phy_caps->phy_type_low;
+
+ if (!(config.phy_type_high || config.phy_type_low)) {
+ /* If there is no intersection and lenient mode is enabled, then
+ * intersect the requested advertised speed with NVM media type
+ * PHY types.
+ */
+ if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags)) {
+ config.phy_type_high = cpu_to_le64(phy_type_high) &
+ pf->nvm_phy_type_hi;
+ config.phy_type_low = cpu_to_le64(phy_type_low) &
+ pf->nvm_phy_type_lo;
+ } else {
+ netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
+ err = -EOPNOTSUPP;
+ goto done;
+ }
+ }
+
+ /* If link is up put link down */
+ if (pi->phy.link_info.link_info & ICE_AQ_LINK_UP) {
+ /* Tell the OS link is going down, the link will go
+ * back up when fw says it is ready asynchronously
+ */
+ ice_print_link_msg(np->vsi, false);
+ netif_carrier_off(netdev);
+ netif_tx_stop_all_queues(netdev);
+ }
+
+ /* make the aq call */
+ err = ice_aq_set_phy_cfg(&pf->hw, pi, &config, NULL);
+ if (err) {
+ netdev_info(netdev, "Set phy config failed,\n");
+ goto done;
+ }
+
+ /* Save speed request */
+ pi->phy.curr_user_speed_req = adv_link_speed;
+done:
+ kfree(phy_caps);
+ clear_bit(ICE_CFG_BUSY, pf->state);
+
+ return err;
+}
+
+/**
+ * ice_parse_hdrs - parses headers from RSS hash input
+ * @nfc: ethtool rxnfc command
+ *
+ * This function parses the rxnfc command and returns intended
+ * header types for RSS configuration
+ */
+static u32 ice_parse_hdrs(struct ethtool_rxnfc *nfc)
+{
+ u32 hdrs = ICE_FLOW_SEG_HDR_NONE;
+
+ switch (nfc->flow_type) {
+ case TCP_V4_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV4;
+ break;
+ case UDP_V4_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV4;
+ break;
+ case SCTP_V4_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV4;
+ break;
+ case TCP_V6_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV6;
+ break;
+ case UDP_V6_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV6;
+ break;
+ case SCTP_V6_FLOW:
+ hdrs |= ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV6;
+ break;
+ default:
+ break;
+ }
+ return hdrs;
+}
+
+#define ICE_FLOW_HASH_FLD_IPV4_SA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)
+#define ICE_FLOW_HASH_FLD_IPV6_SA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)
+#define ICE_FLOW_HASH_FLD_IPV4_DA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)
+#define ICE_FLOW_HASH_FLD_IPV6_DA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)
+#define ICE_FLOW_HASH_FLD_TCP_SRC_PORT BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)
+#define ICE_FLOW_HASH_FLD_TCP_DST_PORT BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)
+#define ICE_FLOW_HASH_FLD_UDP_SRC_PORT BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)
+#define ICE_FLOW_HASH_FLD_UDP_DST_PORT BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)
+#define ICE_FLOW_HASH_FLD_SCTP_SRC_PORT \
+ BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)
+#define ICE_FLOW_HASH_FLD_SCTP_DST_PORT \
+ BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)
+
+/**
+ * ice_parse_hash_flds - parses hash fields from RSS hash input
+ * @nfc: ethtool rxnfc command
+ *
+ * This function parses the rxnfc command and returns intended
+ * hash fields for RSS configuration
+ */
+static u64 ice_parse_hash_flds(struct ethtool_rxnfc *nfc)
+{
+ u64 hfld = ICE_HASH_INVALID;
+
+ if (nfc->data & RXH_IP_SRC || nfc->data & RXH_IP_DST) {
+ switch (nfc->flow_type) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ case SCTP_V4_FLOW:
+ if (nfc->data & RXH_IP_SRC)
+ hfld |= ICE_FLOW_HASH_FLD_IPV4_SA;
+ if (nfc->data & RXH_IP_DST)
+ hfld |= ICE_FLOW_HASH_FLD_IPV4_DA;
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ case SCTP_V6_FLOW:
+ if (nfc->data & RXH_IP_SRC)
+ hfld |= ICE_FLOW_HASH_FLD_IPV6_SA;
+ if (nfc->data & RXH_IP_DST)
+ hfld |= ICE_FLOW_HASH_FLD_IPV6_DA;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (nfc->data & RXH_L4_B_0_1 || nfc->data & RXH_L4_B_2_3) {
+ switch (nfc->flow_type) {
+ case TCP_V4_FLOW:
+ case TCP_V6_FLOW:
+ if (nfc->data & RXH_L4_B_0_1)
+ hfld |= ICE_FLOW_HASH_FLD_TCP_SRC_PORT;
+ if (nfc->data & RXH_L4_B_2_3)
+ hfld |= ICE_FLOW_HASH_FLD_TCP_DST_PORT;
+ break;
+ case UDP_V4_FLOW:
+ case UDP_V6_FLOW:
+ if (nfc->data & RXH_L4_B_0_1)
+ hfld |= ICE_FLOW_HASH_FLD_UDP_SRC_PORT;
+ if (nfc->data & RXH_L4_B_2_3)
+ hfld |= ICE_FLOW_HASH_FLD_UDP_DST_PORT;
+ break;
+ case SCTP_V4_FLOW:
+ case SCTP_V6_FLOW:
+ if (nfc->data & RXH_L4_B_0_1)
+ hfld |= ICE_FLOW_HASH_FLD_SCTP_SRC_PORT;
+ if (nfc->data & RXH_L4_B_2_3)
+ hfld |= ICE_FLOW_HASH_FLD_SCTP_DST_PORT;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return hfld;
+}
+
+/**
+ * ice_set_rss_hash_opt - Enable/Disable flow types for RSS hash
+ * @vsi: the VSI being configured
+ * @nfc: ethtool rxnfc command
+ *
+ * Returns Success if the flow input set is supported.
+ */
+static int
+ice_set_rss_hash_opt(struct ice_vsi *vsi, struct ethtool_rxnfc *nfc)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ u64 hashed_flds;
+ int status;
+ u32 hdrs;
+
+ dev = ice_pf_to_dev(pf);
+ if (ice_is_safe_mode(pf)) {
+ dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
+ vsi->vsi_num);
+ return -EINVAL;
+ }
+
+ hashed_flds = ice_parse_hash_flds(nfc);
+ if (hashed_flds == ICE_HASH_INVALID) {
+ dev_dbg(dev, "Invalid hash fields, vsi num = %d\n",
+ vsi->vsi_num);
+ return -EINVAL;
+ }
+
+ hdrs = ice_parse_hdrs(nfc);
+ if (hdrs == ICE_FLOW_SEG_HDR_NONE) {
+ dev_dbg(dev, "Header type is not valid, vsi num = %d\n",
+ vsi->vsi_num);
+ return -EINVAL;
+ }
+
+ status = ice_add_rss_cfg(&pf->hw, vsi->idx, hashed_flds, hdrs);
+ if (status) {
+ dev_dbg(dev, "ice_add_rss_cfg failed, vsi num = %d, error = %d\n",
+ vsi->vsi_num, status);
+ return status;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_get_rss_hash_opt - Retrieve hash fields for a given flow-type
+ * @vsi: the VSI being configured
+ * @nfc: ethtool rxnfc command
+ */
+static void
+ice_get_rss_hash_opt(struct ice_vsi *vsi, struct ethtool_rxnfc *nfc)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ u64 hash_flds;
+ u32 hdrs;
+
+ dev = ice_pf_to_dev(pf);
+
+ nfc->data = 0;
+ if (ice_is_safe_mode(pf)) {
+ dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
+ vsi->vsi_num);
+ return;
+ }
+
+ hdrs = ice_parse_hdrs(nfc);
+ if (hdrs == ICE_FLOW_SEG_HDR_NONE) {
+ dev_dbg(dev, "Header type is not valid, vsi num = %d\n",
+ vsi->vsi_num);
+ return;
+ }
+
+ hash_flds = ice_get_rss_cfg(&pf->hw, vsi->idx, hdrs);
+ if (hash_flds == ICE_HASH_INVALID) {
+ dev_dbg(dev, "No hash fields found for the given header type, vsi num = %d\n",
+ vsi->vsi_num);
+ return;
+ }
+
+ if (hash_flds & ICE_FLOW_HASH_FLD_IPV4_SA ||
+ hash_flds & ICE_FLOW_HASH_FLD_IPV6_SA)
+ nfc->data |= (u64)RXH_IP_SRC;
+
+ if (hash_flds & ICE_FLOW_HASH_FLD_IPV4_DA ||
+ hash_flds & ICE_FLOW_HASH_FLD_IPV6_DA)
+ nfc->data |= (u64)RXH_IP_DST;
+
+ if (hash_flds & ICE_FLOW_HASH_FLD_TCP_SRC_PORT ||
+ hash_flds & ICE_FLOW_HASH_FLD_UDP_SRC_PORT ||
+ hash_flds & ICE_FLOW_HASH_FLD_SCTP_SRC_PORT)
+ nfc->data |= (u64)RXH_L4_B_0_1;
+
+ if (hash_flds & ICE_FLOW_HASH_FLD_TCP_DST_PORT ||
+ hash_flds & ICE_FLOW_HASH_FLD_UDP_DST_PORT ||
+ hash_flds & ICE_FLOW_HASH_FLD_SCTP_DST_PORT)
+ nfc->data |= (u64)RXH_L4_B_2_3;
+}
+
+/**
+ * ice_set_rxnfc - command to set Rx flow rules.
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns 0 for success and negative values for errors
+ */
+static int ice_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_SRXCLSRLINS:
+ return ice_add_fdir_ethtool(vsi, cmd);
+ case ETHTOOL_SRXCLSRLDEL:
+ return ice_del_fdir_ethtool(vsi, cmd);
+ case ETHTOOL_SRXFH:
+ return ice_set_rss_hash_opt(vsi, cmd);
+ default:
+ break;
+ }
+ return -EOPNOTSUPP;
+}
+
+/**
+ * ice_get_rxnfc - command to get Rx flow classification rules
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ * @rule_locs: buffer to rturn Rx flow classification rules
+ *
+ * Returns Success if the command is supported.
+ */
+static int
+ice_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
+ u32 __always_unused *rule_locs)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ int ret = -EOPNOTSUPP;
+ struct ice_hw *hw;
+
+ hw = &vsi->back->hw;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_GRXRINGS:
+ cmd->data = vsi->rss_size;
+ ret = 0;
+ break;
+ case ETHTOOL_GRXCLSRLCNT:
+ cmd->rule_cnt = hw->fdir_active_fltr;
+ /* report total rule count */
+ cmd->data = ice_get_fdir_cnt_all(hw);
+ ret = 0;
+ break;
+ case ETHTOOL_GRXCLSRULE:
+ ret = ice_get_ethtool_fdir_entry(hw, cmd);
+ break;
+ case ETHTOOL_GRXCLSRLALL:
+ ret = ice_get_fdir_fltr_ids(hw, cmd, (u32 *)rule_locs);
+ break;
+ case ETHTOOL_GRXFH:
+ ice_get_rss_hash_opt(vsi, cmd);
+ ret = 0;
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static void
+ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
+ struct kernel_ethtool_ringparam *kernel_ring,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+
+ ring->rx_max_pending = ICE_MAX_NUM_DESC;
+ ring->tx_max_pending = ICE_MAX_NUM_DESC;
+ ring->rx_pending = vsi->rx_rings[0]->count;
+ ring->tx_pending = vsi->tx_rings[0]->count;
+
+ /* Rx mini and jumbo rings are not supported */
+ ring->rx_mini_max_pending = 0;
+ ring->rx_jumbo_max_pending = 0;
+ ring->rx_mini_pending = 0;
+ ring->rx_jumbo_pending = 0;
+}
+
+static int
+ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
+ struct kernel_ethtool_ringparam *kernel_ring,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_tx_ring *xdp_rings = NULL;
+ struct ice_tx_ring *tx_rings = NULL;
+ struct ice_rx_ring *rx_rings = NULL;
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ int i, timeout = 50, err = 0;
+ u16 new_rx_cnt, new_tx_cnt;
+
+ if (ring->tx_pending > ICE_MAX_NUM_DESC ||
+ ring->tx_pending < ICE_MIN_NUM_DESC ||
+ ring->rx_pending > ICE_MAX_NUM_DESC ||
+ ring->rx_pending < ICE_MIN_NUM_DESC) {
+ netdev_err(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d] (increment %d)\n",
+ ring->tx_pending, ring->rx_pending,
+ ICE_MIN_NUM_DESC, ICE_MAX_NUM_DESC,
+ ICE_REQ_DESC_MULTIPLE);
+ return -EINVAL;
+ }
+
+ new_tx_cnt = ALIGN(ring->tx_pending, ICE_REQ_DESC_MULTIPLE);
+ if (new_tx_cnt != ring->tx_pending)
+ netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n",
+ new_tx_cnt);
+ new_rx_cnt = ALIGN(ring->rx_pending, ICE_REQ_DESC_MULTIPLE);
+ if (new_rx_cnt != ring->rx_pending)
+ netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n",
+ new_rx_cnt);
+
+ /* if nothing to do return success */
+ if (new_tx_cnt == vsi->tx_rings[0]->count &&
+ new_rx_cnt == vsi->rx_rings[0]->count) {
+ netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n");
+ return 0;
+ }
+
+ /* If there is a AF_XDP UMEM attached to any of Rx rings,
+ * disallow changing the number of descriptors -- regardless
+ * if the netdev is running or not.
+ */
+ if (ice_xsk_any_rx_ring_ena(vsi))
+ return -EBUSY;
+
+ while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) {
+ timeout--;
+ if (!timeout)
+ return -EBUSY;
+ usleep_range(1000, 2000);
+ }
+
+ /* set for the next time the netdev is started */
+ if (!netif_running(vsi->netdev)) {
+ ice_for_each_alloc_txq(vsi, i)
+ vsi->tx_rings[i]->count = new_tx_cnt;
+ ice_for_each_alloc_rxq(vsi, i)
+ vsi->rx_rings[i]->count = new_rx_cnt;
+ if (ice_is_xdp_ena_vsi(vsi))
+ ice_for_each_xdp_txq(vsi, i)
+ vsi->xdp_rings[i]->count = new_tx_cnt;
+ vsi->num_tx_desc = (u16)new_tx_cnt;
+ vsi->num_rx_desc = (u16)new_rx_cnt;
+ netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n");
+ goto done;
+ }
+
+ if (new_tx_cnt == vsi->tx_rings[0]->count)
+ goto process_rx;
+
+ /* alloc updated Tx resources */
+ netdev_info(netdev, "Changing Tx descriptor count from %d to %d\n",
+ vsi->tx_rings[0]->count, new_tx_cnt);
+
+ tx_rings = kcalloc(vsi->num_txq, sizeof(*tx_rings), GFP_KERNEL);
+ if (!tx_rings) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ ice_for_each_txq(vsi, i) {
+ /* clone ring and setup updated count */
+ tx_rings[i] = *vsi->tx_rings[i];
+ tx_rings[i].count = new_tx_cnt;
+ tx_rings[i].desc = NULL;
+ tx_rings[i].tx_buf = NULL;
+ tx_rings[i].tx_tstamps = &pf->ptp.port.tx;
+ err = ice_setup_tx_ring(&tx_rings[i]);
+ if (err) {
+ while (i--)
+ ice_clean_tx_ring(&tx_rings[i]);
+ kfree(tx_rings);
+ goto done;
+ }
+ }
+
+ if (!ice_is_xdp_ena_vsi(vsi))
+ goto process_rx;
+
+ /* alloc updated XDP resources */
+ netdev_info(netdev, "Changing XDP descriptor count from %d to %d\n",
+ vsi->xdp_rings[0]->count, new_tx_cnt);
+
+ xdp_rings = kcalloc(vsi->num_xdp_txq, sizeof(*xdp_rings), GFP_KERNEL);
+ if (!xdp_rings) {
+ err = -ENOMEM;
+ goto free_tx;
+ }
+
+ ice_for_each_xdp_txq(vsi, i) {
+ /* clone ring and setup updated count */
+ xdp_rings[i] = *vsi->xdp_rings[i];
+ xdp_rings[i].count = new_tx_cnt;
+ xdp_rings[i].next_dd = ICE_RING_QUARTER(&xdp_rings[i]) - 1;
+ xdp_rings[i].next_rs = ICE_RING_QUARTER(&xdp_rings[i]) - 1;
+ xdp_rings[i].desc = NULL;
+ xdp_rings[i].tx_buf = NULL;
+ err = ice_setup_tx_ring(&xdp_rings[i]);
+ if (err) {
+ while (i--)
+ ice_clean_tx_ring(&xdp_rings[i]);
+ kfree(xdp_rings);
+ goto free_tx;
+ }
+ ice_set_ring_xdp(&xdp_rings[i]);
+ }
+
+process_rx:
+ if (new_rx_cnt == vsi->rx_rings[0]->count)
+ goto process_link;
+
+ /* alloc updated Rx resources */
+ netdev_info(netdev, "Changing Rx descriptor count from %d to %d\n",
+ vsi->rx_rings[0]->count, new_rx_cnt);
+
+ rx_rings = kcalloc(vsi->num_rxq, sizeof(*rx_rings), GFP_KERNEL);
+ if (!rx_rings) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ ice_for_each_rxq(vsi, i) {
+ /* clone ring and setup updated count */
+ rx_rings[i] = *vsi->rx_rings[i];
+ rx_rings[i].count = new_rx_cnt;
+ rx_rings[i].cached_phctime = pf->ptp.cached_phc_time;
+ rx_rings[i].desc = NULL;
+ rx_rings[i].rx_buf = NULL;
+ /* this is to allow wr32 to have something to write to
+ * during early allocation of Rx buffers
+ */
+ rx_rings[i].tail = vsi->back->hw.hw_addr + PRTGEN_STATUS;
+
+ err = ice_setup_rx_ring(&rx_rings[i]);
+ if (err)
+ goto rx_unwind;
+
+ /* allocate Rx buffers */
+ err = ice_alloc_rx_bufs(&rx_rings[i],
+ ICE_DESC_UNUSED(&rx_rings[i]));
+rx_unwind:
+ if (err) {
+ while (i) {
+ i--;
+ ice_free_rx_ring(&rx_rings[i]);
+ }
+ kfree(rx_rings);
+ err = -ENOMEM;
+ goto free_tx;
+ }
+ }
+
+process_link:
+ /* Bring interface down, copy in the new ring info, then restore the
+ * interface. if VSI is up, bring it down and then back up
+ */
+ if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
+ ice_down(vsi);
+
+ if (tx_rings) {
+ ice_for_each_txq(vsi, i) {
+ ice_free_tx_ring(vsi->tx_rings[i]);
+ *vsi->tx_rings[i] = tx_rings[i];
+ }
+ kfree(tx_rings);
+ }
+
+ if (rx_rings) {
+ ice_for_each_rxq(vsi, i) {
+ ice_free_rx_ring(vsi->rx_rings[i]);
+ /* copy the real tail offset */
+ rx_rings[i].tail = vsi->rx_rings[i]->tail;
+ /* this is to fake out the allocation routine
+ * into thinking it has to realloc everything
+ * but the recycling logic will let us re-use
+ * the buffers allocated above
+ */
+ rx_rings[i].next_to_use = 0;
+ rx_rings[i].next_to_clean = 0;
+ rx_rings[i].next_to_alloc = 0;
+ *vsi->rx_rings[i] = rx_rings[i];
+ }
+ kfree(rx_rings);
+ }
+
+ if (xdp_rings) {
+ ice_for_each_xdp_txq(vsi, i) {
+ ice_free_tx_ring(vsi->xdp_rings[i]);
+ *vsi->xdp_rings[i] = xdp_rings[i];
+ }
+ kfree(xdp_rings);
+ }
+
+ vsi->num_tx_desc = new_tx_cnt;
+ vsi->num_rx_desc = new_rx_cnt;
+ ice_up(vsi);
+ }
+ goto done;
+
+free_tx:
+ /* error cleanup if the Rx allocations failed after getting Tx */
+ if (tx_rings) {
+ ice_for_each_txq(vsi, i)
+ ice_free_tx_ring(&tx_rings[i]);
+ kfree(tx_rings);
+ }
+
+done:
+ clear_bit(ICE_CFG_BUSY, pf->state);
+ return err;
+}
+
+/**
+ * ice_get_pauseparam - Get Flow Control status
+ * @netdev: network interface device structure
+ * @pause: ethernet pause (flow control) parameters
+ *
+ * Get requested flow control status from PHY capability.
+ * If autoneg is true, then ethtool will send the ETHTOOL_GSET ioctl which
+ * is handled by ice_get_link_ksettings. ice_get_link_ksettings will report
+ * the negotiated Rx/Tx pause via lp_advertising.
+ */
+static void
+ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_port_info *pi = np->vsi->port_info;
+ struct ice_aqc_get_phy_caps_data *pcaps;
+ struct ice_dcbx_cfg *dcbx_cfg;
+ int status;
+
+ /* Initialize pause params */
+ pause->rx_pause = 0;
+ pause->tx_pause = 0;
+
+ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
+
+ pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+ if (!pcaps)
+ return;
+
+ /* Get current PHY config */
+ status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
+ NULL);
+ if (status)
+ goto out;
+
+ pause->autoneg = ice_is_phy_caps_an_enabled(pcaps) ? AUTONEG_ENABLE :
+ AUTONEG_DISABLE;
+
+ if (dcbx_cfg->pfc.pfcena)
+ /* PFC enabled so report LFC as off */
+ goto out;
+
+ if (pcaps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE)
+ pause->tx_pause = 1;
+ if (pcaps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
+ pause->rx_pause = 1;
+
+out:
+ kfree(pcaps);
+}
+
+/**
+ * ice_set_pauseparam - Set Flow Control parameter
+ * @netdev: network interface device structure
+ * @pause: return Tx/Rx flow control status
+ */
+static int
+ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_aqc_get_phy_caps_data *pcaps;
+ struct ice_link_status *hw_link_info;
+ struct ice_pf *pf = np->vsi->back;
+ struct ice_dcbx_cfg *dcbx_cfg;
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_hw *hw = &pf->hw;
+ struct ice_port_info *pi;
+ u8 aq_failures;
+ bool link_up;
+ u32 is_an;
+ int err;
+
+ pi = vsi->port_info;
+ hw_link_info = &pi->phy.link_info;
+ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
+ link_up = hw_link_info->link_info & ICE_AQ_LINK_UP;
+
+ /* Changing the port's flow control is not supported if this isn't the
+ * PF VSI
+ */
+ if (vsi->type != ICE_VSI_PF) {
+ netdev_info(netdev, "Changing flow control parameters only supported for PF VSI\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* Get pause param reports configured and negotiated flow control pause
+ * when ETHTOOL_GLINKSETTINGS is defined. Since ETHTOOL_GLINKSETTINGS is
+ * defined get pause param pause->autoneg reports SW configured setting,
+ * so compare pause->autoneg with SW configured to prevent the user from
+ * using set pause param to chance autoneg.
+ */
+ pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+ if (!pcaps)
+ return -ENOMEM;
+
+ /* Get current PHY config */
+ err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
+ NULL);
+ if (err) {
+ kfree(pcaps);
+ return err;
+ }
+
+ is_an = ice_is_phy_caps_an_enabled(pcaps) ? AUTONEG_ENABLE :
+ AUTONEG_DISABLE;
+
+ kfree(pcaps);
+
+ if (pause->autoneg != is_an) {
+ netdev_info(netdev, "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* If we have link and don't have autoneg */
+ if (!test_bit(ICE_DOWN, pf->state) &&
+ !(hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) {
+ /* Send message that it might not necessarily work*/
+ netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n");
+ }
+
+ if (dcbx_cfg->pfc.pfcena) {
+ netdev_info(netdev, "Priority flow control enabled. Cannot set link flow control.\n");
+ return -EOPNOTSUPP;
+ }
+ if (pause->rx_pause && pause->tx_pause)
+ pi->fc.req_mode = ICE_FC_FULL;
+ else if (pause->rx_pause && !pause->tx_pause)
+ pi->fc.req_mode = ICE_FC_RX_PAUSE;
+ else if (!pause->rx_pause && pause->tx_pause)
+ pi->fc.req_mode = ICE_FC_TX_PAUSE;
+ else if (!pause->rx_pause && !pause->tx_pause)
+ pi->fc.req_mode = ICE_FC_NONE;
+ else
+ return -EINVAL;
+
+ /* Set the FC mode and only restart AN if link is up */
+ err = ice_set_fc(pi, &aq_failures, link_up);
+
+ if (aq_failures & ICE_SET_FC_AQ_FAIL_GET) {
+ netdev_info(netdev, "Set fc failed on the get_phy_capabilities call with err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ err = -EAGAIN;
+ } else if (aq_failures & ICE_SET_FC_AQ_FAIL_SET) {
+ netdev_info(netdev, "Set fc failed on the set_phy_config call with err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ err = -EAGAIN;
+ } else if (aq_failures & ICE_SET_FC_AQ_FAIL_UPDATE) {
+ netdev_info(netdev, "Set fc failed on the get_link_info call with err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ err = -EAGAIN;
+ }
+
+ return err;
+}
+
+/**
+ * ice_get_rxfh_key_size - get the RSS hash key size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size.
+ */
+static u32 ice_get_rxfh_key_size(struct net_device __always_unused *netdev)
+{
+ return ICE_VSIQF_HKEY_ARRAY_SIZE;
+}
+
+/**
+ * ice_get_rxfh_indir_size - get the Rx flow hash indirection table size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size.
+ */
+static u32 ice_get_rxfh_indir_size(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+
+ return np->vsi->rss_table_size;
+}
+
+static int
+ice_get_rxfh_context(struct net_device *netdev, u32 *indir,
+ u8 *key, u8 *hfunc, u32 rss_context)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ u16 qcount, offset;
+ int err, num_tc, i;
+ u8 *lut;
+
+ if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+ netdev_warn(netdev, "RSS is not supported on this VSI!\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (rss_context && !ice_is_adq_active(pf)) {
+ netdev_err(netdev, "RSS context cannot be non-zero when ADQ is not configured.\n");
+ return -EINVAL;
+ }
+
+ qcount = vsi->mqprio_qopt.qopt.count[rss_context];
+ offset = vsi->mqprio_qopt.qopt.offset[rss_context];
+
+ if (rss_context && ice_is_adq_active(pf)) {
+ num_tc = vsi->mqprio_qopt.qopt.num_tc;
+ if (rss_context >= num_tc) {
+ netdev_err(netdev, "RSS context:%d > num_tc:%d\n",
+ rss_context, num_tc);
+ return -EINVAL;
+ }
+ /* Use channel VSI of given TC */
+ vsi = vsi->tc_map_vsi[rss_context];
+ }
+
+ if (hfunc)
+ *hfunc = ETH_RSS_HASH_TOP;
+
+ if (!indir)
+ return 0;
+
+ lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+ if (!lut)
+ return -ENOMEM;
+
+ err = ice_get_rss_key(vsi, key);
+ if (err)
+ goto out;
+
+ err = ice_get_rss_lut(vsi, lut, vsi->rss_table_size);
+ if (err)
+ goto out;
+
+ if (ice_is_adq_active(pf)) {
+ for (i = 0; i < vsi->rss_table_size; i++)
+ indir[i] = offset + lut[i] % qcount;
+ goto out;
+ }
+
+ for (i = 0; i < vsi->rss_table_size; i++)
+ indir[i] = lut[i];
+
+out:
+ kfree(lut);
+ return err;
+}
+
+/**
+ * ice_get_rxfh - get the Rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function
+ *
+ * Reads the indirection table directly from the hardware.
+ */
+static int
+ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc)
+{
+ return ice_get_rxfh_context(netdev, indir, key, hfunc, 0);
+}
+
+/**
+ * ice_set_rxfh - set the Rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function
+ *
+ * Returns -EINVAL if the table specifies an invalid queue ID, otherwise
+ * returns 0 after programming the table.
+ */
+static int
+ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
+ const u8 hfunc)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ int err;
+
+ dev = ice_pf_to_dev(pf);
+ if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+ return -EOPNOTSUPP;
+
+ if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+ /* RSS not supported return error here */
+ netdev_warn(netdev, "RSS is not configured on this VSI!\n");
+ return -EIO;
+ }
+
+ if (ice_is_adq_active(pf)) {
+ netdev_err(netdev, "Cannot change RSS params with ADQ configured.\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (key) {
+ if (!vsi->rss_hkey_user) {
+ vsi->rss_hkey_user =
+ devm_kzalloc(dev, ICE_VSIQF_HKEY_ARRAY_SIZE,
+ GFP_KERNEL);
+ if (!vsi->rss_hkey_user)
+ return -ENOMEM;
+ }
+ memcpy(vsi->rss_hkey_user, key, ICE_VSIQF_HKEY_ARRAY_SIZE);
+
+ err = ice_set_rss_key(vsi, vsi->rss_hkey_user);
+ if (err)
+ return err;
+ }
+
+ if (!vsi->rss_lut_user) {
+ vsi->rss_lut_user = devm_kzalloc(dev, vsi->rss_table_size,
+ GFP_KERNEL);
+ if (!vsi->rss_lut_user)
+ return -ENOMEM;
+ }
+
+ /* Each 32 bits pointed by 'indir' is stored with a lut entry */
+ if (indir) {
+ int i;
+
+ for (i = 0; i < vsi->rss_table_size; i++)
+ vsi->rss_lut_user[i] = (u8)(indir[i]);
+ } else {
+ ice_fill_rss_lut(vsi->rss_lut_user, vsi->rss_table_size,
+ vsi->rss_size);
+ }
+
+ err = ice_set_rss_lut(vsi, vsi->rss_lut_user, vsi->rss_table_size);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int
+ice_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(dev);
+
+ /* only report timestamping if PTP is enabled */
+ if (!test_bit(ICE_FLAG_PTP, pf->flags))
+ return ethtool_op_get_ts_info(dev, info);
+
+ info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ info->phc_index = ice_get_ptp_clock_index(pf);
+
+ info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON);
+
+ info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL);
+
+ return 0;
+}
+
+/**
+ * ice_get_max_txq - return the maximum number of Tx queues for in a PF
+ * @pf: PF structure
+ */
+static int ice_get_max_txq(struct ice_pf *pf)
+{
+ return min3(pf->num_lan_msix, (u16)num_online_cpus(),
+ (u16)pf->hw.func_caps.common_cap.num_txq);
+}
+
+/**
+ * ice_get_max_rxq - return the maximum number of Rx queues for in a PF
+ * @pf: PF structure
+ */
+static int ice_get_max_rxq(struct ice_pf *pf)
+{
+ return min3(pf->num_lan_msix, (u16)num_online_cpus(),
+ (u16)pf->hw.func_caps.common_cap.num_rxq);
+}
+
+/**
+ * ice_get_combined_cnt - return the current number of combined channels
+ * @vsi: PF VSI pointer
+ *
+ * Go through all queue vectors and count ones that have both Rx and Tx ring
+ * attached
+ */
+static u32 ice_get_combined_cnt(struct ice_vsi *vsi)
+{
+ u32 combined = 0;
+ int q_idx;
+
+ ice_for_each_q_vector(vsi, q_idx) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+ if (q_vector->rx.rx_ring && q_vector->tx.tx_ring)
+ combined++;
+ }
+
+ return combined;
+}
+
+/**
+ * ice_get_channels - get the current and max supported channels
+ * @dev: network interface device structure
+ * @ch: ethtool channel data structure
+ */
+static void
+ice_get_channels(struct net_device *dev, struct ethtool_channels *ch)
+{
+ struct ice_netdev_priv *np = netdev_priv(dev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+
+ /* report maximum channels */
+ ch->max_rx = ice_get_max_rxq(pf);
+ ch->max_tx = ice_get_max_txq(pf);
+ ch->max_combined = min_t(int, ch->max_rx, ch->max_tx);
+
+ /* report current channels */
+ ch->combined_count = ice_get_combined_cnt(vsi);
+ ch->rx_count = vsi->num_rxq - ch->combined_count;
+ ch->tx_count = vsi->num_txq - ch->combined_count;
+
+ /* report other queues */
+ ch->other_count = test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1 : 0;
+ ch->max_other = ch->other_count;
+}
+
+/**
+ * ice_get_valid_rss_size - return valid number of RSS queues
+ * @hw: pointer to the HW structure
+ * @new_size: requested RSS queues
+ */
+static int ice_get_valid_rss_size(struct ice_hw *hw, int new_size)
+{
+ struct ice_hw_common_caps *caps = &hw->func_caps.common_cap;
+
+ return min_t(int, new_size, BIT(caps->rss_table_entry_width));
+}
+
+/**
+ * ice_vsi_set_dflt_rss_lut - set default RSS LUT with requested RSS size
+ * @vsi: VSI to reconfigure RSS LUT on
+ * @req_rss_size: requested range of queue numbers for hashing
+ *
+ * Set the VSI's RSS parameters, configure the RSS LUT based on these.
+ */
+static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ struct ice_hw *hw;
+ int err;
+ u8 *lut;
+
+ dev = ice_pf_to_dev(pf);
+ hw = &pf->hw;
+
+ if (!req_rss_size)
+ return -EINVAL;
+
+ lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+ if (!lut)
+ return -ENOMEM;
+
+ /* set RSS LUT parameters */
+ if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+ vsi->rss_size = 1;
+ else
+ vsi->rss_size = ice_get_valid_rss_size(hw, req_rss_size);
+
+ /* create/set RSS LUT */
+ ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
+ err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size);
+ if (err)
+ dev_err(dev, "Cannot set RSS lut, err %d aq_err %s\n", err,
+ ice_aq_str(hw->adminq.sq_last_status));
+
+ kfree(lut);
+ return err;
+}
+
+/**
+ * ice_set_channels - set the number channels
+ * @dev: network interface device structure
+ * @ch: ethtool channel data structure
+ */
+static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
+{
+ struct ice_netdev_priv *np = netdev_priv(dev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ int new_rx = 0, new_tx = 0;
+ bool locked = false;
+ u32 curr_combined;
+ int ret = 0;
+
+ /* do not support changing channels in Safe Mode */
+ if (ice_is_safe_mode(pf)) {
+ netdev_err(dev, "Changing channel in Safe Mode is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ /* do not support changing other_count */
+ if (ch->other_count != (test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1U : 0U))
+ return -EINVAL;
+
+ if (ice_is_adq_active(pf)) {
+ netdev_err(dev, "Cannot set channels with ADQ configured.\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (test_bit(ICE_FLAG_FD_ENA, pf->flags) && pf->hw.fdir_active_fltr) {
+ netdev_err(dev, "Cannot set channels when Flow Director filters are active\n");
+ return -EOPNOTSUPP;
+ }
+
+ curr_combined = ice_get_combined_cnt(vsi);
+
+ /* these checks are for cases where user didn't specify a particular
+ * value on cmd line but we get non-zero value anyway via
+ * get_channels(); look at ethtool.c in ethtool repository (the user
+ * space part), particularly, do_schannels() routine
+ */
+ if (ch->rx_count == vsi->num_rxq - curr_combined)
+ ch->rx_count = 0;
+ if (ch->tx_count == vsi->num_txq - curr_combined)
+ ch->tx_count = 0;
+ if (ch->combined_count == curr_combined)
+ ch->combined_count = 0;
+
+ if (!(ch->combined_count || (ch->rx_count && ch->tx_count))) {
+ netdev_err(dev, "Please specify at least 1 Rx and 1 Tx channel\n");
+ return -EINVAL;
+ }
+
+ new_rx = ch->combined_count + ch->rx_count;
+ new_tx = ch->combined_count + ch->tx_count;
+
+ if (new_rx < vsi->tc_cfg.numtc) {
+ netdev_err(dev, "Cannot set less Rx channels, than Traffic Classes you have (%u)\n",
+ vsi->tc_cfg.numtc);
+ return -EINVAL;
+ }
+ if (new_tx < vsi->tc_cfg.numtc) {
+ netdev_err(dev, "Cannot set less Tx channels, than Traffic Classes you have (%u)\n",
+ vsi->tc_cfg.numtc);
+ return -EINVAL;
+ }
+ if (new_rx > ice_get_max_rxq(pf)) {
+ netdev_err(dev, "Maximum allowed Rx channels is %d\n",
+ ice_get_max_rxq(pf));
+ return -EINVAL;
+ }
+ if (new_tx > ice_get_max_txq(pf)) {
+ netdev_err(dev, "Maximum allowed Tx channels is %d\n",
+ ice_get_max_txq(pf));
+ return -EINVAL;
+ }
+
+ if (pf->adev) {
+ mutex_lock(&pf->adev_mutex);
+ device_lock(&pf->adev->dev);
+ locked = true;
+ if (pf->adev->dev.driver) {
+ netdev_err(dev, "Cannot change channels when RDMA is active\n");
+ ret = -EBUSY;
+ goto adev_unlock;
+ }
+ }
+
+ ice_vsi_recfg_qs(vsi, new_rx, new_tx, locked);
+
+ if (!netif_is_rxfh_configured(dev)) {
+ ret = ice_vsi_set_dflt_rss_lut(vsi, new_rx);
+ goto adev_unlock;
+ }
+
+ /* Update rss_size due to change in Rx queues */
+ vsi->rss_size = ice_get_valid_rss_size(&pf->hw, new_rx);
+
+adev_unlock:
+ if (locked) {
+ device_unlock(&pf->adev->dev);
+ mutex_unlock(&pf->adev_mutex);
+ }
+ return ret;
+}
+
+/**
+ * ice_get_wol - get current Wake on LAN configuration
+ * @netdev: network interface device structure
+ * @wol: Ethtool structure to retrieve WoL settings
+ */
+static void ice_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_pf *pf = np->vsi->back;
+
+ if (np->vsi->type != ICE_VSI_PF)
+ netdev_warn(netdev, "Wake on LAN is not supported on this interface!\n");
+
+ /* Get WoL settings based on the HW capability */
+ if (ice_is_wol_supported(&pf->hw)) {
+ wol->supported = WAKE_MAGIC;
+ wol->wolopts = pf->wol_ena ? WAKE_MAGIC : 0;
+ } else {
+ wol->supported = 0;
+ wol->wolopts = 0;
+ }
+}
+
+/**
+ * ice_set_wol - set Wake on LAN on supported device
+ * @netdev: network interface device structure
+ * @wol: Ethtool structure to set WoL
+ */
+static int ice_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+
+ if (vsi->type != ICE_VSI_PF || !ice_is_wol_supported(&pf->hw))
+ return -EOPNOTSUPP;
+
+ /* only magic packet is supported */
+ if (wol->wolopts && wol->wolopts != WAKE_MAGIC)
+ return -EOPNOTSUPP;
+
+ /* Set WoL only if there is a new value */
+ if (pf->wol_ena != !!wol->wolopts) {
+ pf->wol_ena = !!wol->wolopts;
+ device_set_wakeup_enable(ice_pf_to_dev(pf), pf->wol_ena);
+ netdev_dbg(netdev, "WoL magic packet %sabled\n",
+ pf->wol_ena ? "en" : "dis");
+ }
+
+ return 0;
+}
+
+/**
+ * ice_get_rc_coalesce - get ITR values for specific ring container
+ * @ec: ethtool structure to fill with driver's coalesce settings
+ * @rc: ring container that the ITR values will come from
+ *
+ * Query the device for ice_ring_container specific ITR values. This is
+ * done per ice_ring_container because each q_vector can have 1 or more rings
+ * and all of said ring(s) will have the same ITR values.
+ *
+ * Returns 0 on success, negative otherwise.
+ */
+static int
+ice_get_rc_coalesce(struct ethtool_coalesce *ec, struct ice_ring_container *rc)
+{
+ if (!rc->rx_ring)
+ return -EINVAL;
+
+ switch (rc->type) {
+ case ICE_RX_CONTAINER:
+ ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc);
+ ec->rx_coalesce_usecs = rc->itr_setting;
+ ec->rx_coalesce_usecs_high = rc->rx_ring->q_vector->intrl;
+ break;
+ case ICE_TX_CONTAINER:
+ ec->use_adaptive_tx_coalesce = ITR_IS_DYNAMIC(rc);
+ ec->tx_coalesce_usecs = rc->itr_setting;
+ break;
+ default:
+ dev_dbg(ice_pf_to_dev(rc->rx_ring->vsi->back), "Invalid c_type %d\n", rc->type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_get_q_coalesce - get a queue's ITR/INTRL (coalesce) settings
+ * @vsi: VSI associated to the queue for getting ITR/INTRL (coalesce) settings
+ * @ec: coalesce settings to program the device with
+ * @q_num: update ITR/INTRL (coalesce) settings for this queue number/index
+ *
+ * Return 0 on success, and negative under the following conditions:
+ * 1. Getting Tx or Rx ITR/INTRL (coalesce) settings failed.
+ * 2. The q_num passed in is not a valid number/index for Tx and Rx rings.
+ */
+static int
+ice_get_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num)
+{
+ if (q_num < vsi->num_rxq && q_num < vsi->num_txq) {
+ if (ice_get_rc_coalesce(ec,
+ &vsi->rx_rings[q_num]->q_vector->rx))
+ return -EINVAL;
+ if (ice_get_rc_coalesce(ec,
+ &vsi->tx_rings[q_num]->q_vector->tx))
+ return -EINVAL;
+ } else if (q_num < vsi->num_rxq) {
+ if (ice_get_rc_coalesce(ec,
+ &vsi->rx_rings[q_num]->q_vector->rx))
+ return -EINVAL;
+ } else if (q_num < vsi->num_txq) {
+ if (ice_get_rc_coalesce(ec,
+ &vsi->tx_rings[q_num]->q_vector->tx))
+ return -EINVAL;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * __ice_get_coalesce - get ITR/INTRL values for the device
+ * @netdev: pointer to the netdev associated with this query
+ * @ec: ethtool structure to fill with driver's coalesce settings
+ * @q_num: queue number to get the coalesce settings for
+ *
+ * If the caller passes in a negative q_num then we return coalesce settings
+ * based on queue number 0, else use the actual q_num passed in.
+ */
+static int
+__ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
+ int q_num)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+
+ if (q_num < 0)
+ q_num = 0;
+
+ if (ice_get_q_coalesce(vsi, ec, q_num))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ice_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
+{
+ return __ice_get_coalesce(netdev, ec, -1);
+}
+
+static int
+ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num,
+ struct ethtool_coalesce *ec)
+{
+ return __ice_get_coalesce(netdev, ec, q_num);
+}
+
+/**
+ * ice_set_rc_coalesce - set ITR values for specific ring container
+ * @ec: ethtool structure from user to update ITR settings
+ * @rc: ring container that the ITR values will come from
+ * @vsi: VSI associated to the ring container
+ *
+ * Set specific ITR values. This is done per ice_ring_container because each
+ * q_vector can have 1 or more rings and all of said ring(s) will have the same
+ * ITR values.
+ *
+ * Returns 0 on success, negative otherwise.
+ */
+static int
+ice_set_rc_coalesce(struct ethtool_coalesce *ec,
+ struct ice_ring_container *rc, struct ice_vsi *vsi)
+{
+ const char *c_type_str = (rc->type == ICE_RX_CONTAINER) ? "rx" : "tx";
+ u32 use_adaptive_coalesce, coalesce_usecs;
+ struct ice_pf *pf = vsi->back;
+ u16 itr_setting;
+
+ if (!rc->rx_ring)
+ return -EINVAL;
+
+ switch (rc->type) {
+ case ICE_RX_CONTAINER:
+ {
+ struct ice_q_vector *q_vector = rc->rx_ring->q_vector;
+
+ if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL ||
+ (ec->rx_coalesce_usecs_high &&
+ ec->rx_coalesce_usecs_high < pf->hw.intrl_gran)) {
+ netdev_info(vsi->netdev, "Invalid value, %s-usecs-high valid values are 0 (disabled), %d-%d\n",
+ c_type_str, pf->hw.intrl_gran,
+ ICE_MAX_INTRL);
+ return -EINVAL;
+ }
+ if (ec->rx_coalesce_usecs_high != q_vector->intrl &&
+ (ec->use_adaptive_rx_coalesce || ec->use_adaptive_tx_coalesce)) {
+ netdev_info(vsi->netdev, "Invalid value, %s-usecs-high cannot be changed if adaptive-tx or adaptive-rx is enabled\n",
+ c_type_str);
+ return -EINVAL;
+ }
+ if (ec->rx_coalesce_usecs_high != q_vector->intrl)
+ q_vector->intrl = ec->rx_coalesce_usecs_high;
+
+ use_adaptive_coalesce = ec->use_adaptive_rx_coalesce;
+ coalesce_usecs = ec->rx_coalesce_usecs;
+
+ break;
+ }
+ case ICE_TX_CONTAINER:
+ use_adaptive_coalesce = ec->use_adaptive_tx_coalesce;
+ coalesce_usecs = ec->tx_coalesce_usecs;
+
+ break;
+ default:
+ dev_dbg(ice_pf_to_dev(pf), "Invalid container type %d\n",
+ rc->type);
+ return -EINVAL;
+ }
+
+ itr_setting = rc->itr_setting;
+ if (coalesce_usecs != itr_setting && use_adaptive_coalesce) {
+ netdev_info(vsi->netdev, "%s interrupt throttling cannot be changed if adaptive-%s is enabled\n",
+ c_type_str, c_type_str);
+ return -EINVAL;
+ }
+
+ if (coalesce_usecs > ICE_ITR_MAX) {
+ netdev_info(vsi->netdev, "Invalid value, %s-usecs range is 0-%d\n",
+ c_type_str, ICE_ITR_MAX);
+ return -EINVAL;
+ }
+
+ if (use_adaptive_coalesce) {
+ rc->itr_mode = ITR_DYNAMIC;
+ } else {
+ rc->itr_mode = ITR_STATIC;
+ /* store user facing value how it was set */
+ rc->itr_setting = coalesce_usecs;
+ /* write the change to the register */
+ ice_write_itr(rc, coalesce_usecs);
+ /* force writes to take effect immediately, the flush shouldn't
+ * be done in the functions above because the intent is for
+ * them to do lazy writes.
+ */
+ ice_flush(&pf->hw);
+ }
+
+ return 0;
+}
+
+/**
+ * ice_set_q_coalesce - set a queue's ITR/INTRL (coalesce) settings
+ * @vsi: VSI associated to the queue that need updating
+ * @ec: coalesce settings to program the device with
+ * @q_num: update ITR/INTRL (coalesce) settings for this queue number/index
+ *
+ * Return 0 on success, and negative under the following conditions:
+ * 1. Setting Tx or Rx ITR/INTRL (coalesce) settings failed.
+ * 2. The q_num passed in is not a valid number/index for Tx and Rx rings.
+ */
+static int
+ice_set_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num)
+{
+ if (q_num < vsi->num_rxq && q_num < vsi->num_txq) {
+ if (ice_set_rc_coalesce(ec,
+ &vsi->rx_rings[q_num]->q_vector->rx,
+ vsi))
+ return -EINVAL;
+
+ if (ice_set_rc_coalesce(ec,
+ &vsi->tx_rings[q_num]->q_vector->tx,
+ vsi))
+ return -EINVAL;
+ } else if (q_num < vsi->num_rxq) {
+ if (ice_set_rc_coalesce(ec,
+ &vsi->rx_rings[q_num]->q_vector->rx,
+ vsi))
+ return -EINVAL;
+ } else if (q_num < vsi->num_txq) {
+ if (ice_set_rc_coalesce(ec,
+ &vsi->tx_rings[q_num]->q_vector->tx,
+ vsi))
+ return -EINVAL;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_print_if_odd_usecs - print message if user tries to set odd [tx|rx]-usecs
+ * @netdev: netdev used for print
+ * @itr_setting: previous user setting
+ * @use_adaptive_coalesce: if adaptive coalesce is enabled or being enabled
+ * @coalesce_usecs: requested value of [tx|rx]-usecs
+ * @c_type_str: either "rx" or "tx" to match user set field of [tx|rx]-usecs
+ */
+static void
+ice_print_if_odd_usecs(struct net_device *netdev, u16 itr_setting,
+ u32 use_adaptive_coalesce, u32 coalesce_usecs,
+ const char *c_type_str)
+{
+ if (use_adaptive_coalesce)
+ return;
+
+ if (itr_setting != coalesce_usecs && (coalesce_usecs % 2))
+ netdev_info(netdev, "User set %s-usecs to %d, device only supports even values. Rounding down and attempting to set %s-usecs to %d\n",
+ c_type_str, coalesce_usecs, c_type_str,
+ ITR_REG_ALIGN(coalesce_usecs));
+}
+
+/**
+ * __ice_set_coalesce - set ITR/INTRL values for the device
+ * @netdev: pointer to the netdev associated with this query
+ * @ec: ethtool structure to fill with driver's coalesce settings
+ * @q_num: queue number to get the coalesce settings for
+ *
+ * If the caller passes in a negative q_num then we set the coalesce settings
+ * for all Tx/Rx queues, else use the actual q_num passed in.
+ */
+static int
+__ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
+ int q_num)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+
+ if (q_num < 0) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[0];
+ int v_idx;
+
+ if (q_vector) {
+ ice_print_if_odd_usecs(netdev, q_vector->rx.itr_setting,
+ ec->use_adaptive_rx_coalesce,
+ ec->rx_coalesce_usecs, "rx");
+
+ ice_print_if_odd_usecs(netdev, q_vector->tx.itr_setting,
+ ec->use_adaptive_tx_coalesce,
+ ec->tx_coalesce_usecs, "tx");
+ }
+
+ ice_for_each_q_vector(vsi, v_idx) {
+ /* In some cases if DCB is configured the num_[rx|tx]q
+ * can be less than vsi->num_q_vectors. This check
+ * accounts for that so we don't report a false failure
+ */
+ if (v_idx >= vsi->num_rxq && v_idx >= vsi->num_txq)
+ goto set_complete;
+
+ if (ice_set_q_coalesce(vsi, ec, v_idx))
+ return -EINVAL;
+
+ ice_set_q_vector_intrl(vsi->q_vectors[v_idx]);
+ }
+ goto set_complete;
+ }
+
+ if (ice_set_q_coalesce(vsi, ec, q_num))
+ return -EINVAL;
+
+ ice_set_q_vector_intrl(vsi->q_vectors[q_num]);
+
+set_complete:
+ return 0;
+}
+
+static int ice_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
+{
+ return __ice_set_coalesce(netdev, ec, -1);
+}
+
+static int
+ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num,
+ struct ethtool_coalesce *ec)
+{
+ return __ice_set_coalesce(netdev, ec, q_num);
+}
+
+static void
+ice_repr_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct ice_repr *repr = ice_netdev_to_repr(netdev);
+
+ if (ice_check_vf_ready_for_cfg(repr->vf))
+ return;
+
+ __ice_get_drvinfo(netdev, drvinfo, repr->src_vsi);
+}
+
+static void
+ice_repr_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+ struct ice_repr *repr = ice_netdev_to_repr(netdev);
+
+ /* for port representors only ETH_SS_STATS is supported */
+ if (ice_check_vf_ready_for_cfg(repr->vf) ||
+ stringset != ETH_SS_STATS)
+ return;
+
+ __ice_get_strings(netdev, stringset, data, repr->src_vsi);
+}
+
+static void
+ice_repr_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats __always_unused *stats,
+ u64 *data)
+{
+ struct ice_repr *repr = ice_netdev_to_repr(netdev);
+
+ if (ice_check_vf_ready_for_cfg(repr->vf))
+ return;
+
+ __ice_get_ethtool_stats(netdev, stats, data, repr->src_vsi);
+}
+
+static int ice_repr_get_sset_count(struct net_device *netdev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return ICE_VSI_STATS_LEN;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+#define ICE_I2C_EEPROM_DEV_ADDR 0xA0
+#define ICE_I2C_EEPROM_DEV_ADDR2 0xA2
+#define ICE_MODULE_TYPE_SFP 0x03
+#define ICE_MODULE_TYPE_QSFP_PLUS 0x0D
+#define ICE_MODULE_TYPE_QSFP28 0x11
+#define ICE_MODULE_SFF_ADDR_MODE 0x04
+#define ICE_MODULE_SFF_DIAG_CAPAB 0x40
+#define ICE_MODULE_REVISION_ADDR 0x01
+#define ICE_MODULE_SFF_8472_COMP 0x5E
+#define ICE_MODULE_SFF_8472_SWAP 0x5C
+#define ICE_MODULE_QSFP_MAX_LEN 640
+
+/**
+ * ice_get_module_info - get SFF module type and revision information
+ * @netdev: network interface device structure
+ * @modinfo: module EEPROM size and layout information structure
+ */
+static int
+ice_get_module_info(struct net_device *netdev,
+ struct ethtool_modinfo *modinfo)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u8 sff8472_comp = 0;
+ u8 sff8472_swap = 0;
+ u8 sff8636_rev = 0;
+ u8 value = 0;
+ int status;
+
+ status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, 0x00, 0x00,
+ 0, &value, 1, 0, NULL);
+ if (status)
+ return status;
+
+ switch (value) {
+ case ICE_MODULE_TYPE_SFP:
+ status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+ ICE_MODULE_SFF_8472_COMP, 0x00, 0,
+ &sff8472_comp, 1, 0, NULL);
+ if (status)
+ return status;
+ status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+ ICE_MODULE_SFF_8472_SWAP, 0x00, 0,
+ &sff8472_swap, 1, 0, NULL);
+ if (status)
+ return status;
+
+ if (sff8472_swap & ICE_MODULE_SFF_ADDR_MODE) {
+ modinfo->type = ETH_MODULE_SFF_8079;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+ } else if (sff8472_comp &&
+ (sff8472_swap & ICE_MODULE_SFF_DIAG_CAPAB)) {
+ modinfo->type = ETH_MODULE_SFF_8472;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8079;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+ }
+ break;
+ case ICE_MODULE_TYPE_QSFP_PLUS:
+ case ICE_MODULE_TYPE_QSFP28:
+ status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+ ICE_MODULE_REVISION_ADDR, 0x00, 0,
+ &sff8636_rev, 1, 0, NULL);
+ if (status)
+ return status;
+ /* Check revision compliance */
+ if (sff8636_rev > 0x02) {
+ /* Module is SFF-8636 compliant */
+ modinfo->type = ETH_MODULE_SFF_8636;
+ modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
+ }
+ break;
+ default:
+ netdev_warn(netdev, "SFF Module Type not recognized.\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * ice_get_module_eeprom - fill buffer with SFF EEPROM contents
+ * @netdev: network interface device structure
+ * @ee: EEPROM dump request structure
+ * @data: buffer to be filled with EEPROM contents
+ */
+static int
+ice_get_module_eeprom(struct net_device *netdev,
+ struct ethtool_eeprom *ee, u8 *data)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+#define SFF_READ_BLOCK_SIZE 8
+ u8 value[SFF_READ_BLOCK_SIZE] = { 0 };
+ u8 addr = ICE_I2C_EEPROM_DEV_ADDR;
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ bool is_sfp = false;
+ unsigned int i, j;
+ u16 offset = 0;
+ u8 page = 0;
+ int status;
+
+ if (!ee || !ee->len || !data)
+ return -EINVAL;
+
+ status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, value, 1, 0,
+ NULL);
+ if (status)
+ return status;
+
+ if (value[0] == ICE_MODULE_TYPE_SFP)
+ is_sfp = true;
+
+ memset(data, 0, ee->len);
+ for (i = 0; i < ee->len; i += SFF_READ_BLOCK_SIZE) {
+ offset = i + ee->offset;
+ page = 0;
+
+ /* Check if we need to access the other memory page */
+ if (is_sfp) {
+ if (offset >= ETH_MODULE_SFF_8079_LEN) {
+ offset -= ETH_MODULE_SFF_8079_LEN;
+ addr = ICE_I2C_EEPROM_DEV_ADDR2;
+ }
+ } else {
+ while (offset >= ETH_MODULE_SFF_8436_LEN) {
+ /* Compute memory page number and offset. */
+ offset -= ETH_MODULE_SFF_8436_LEN / 2;
+ page++;
+ }
+ }
+
+ /* Bit 2 of EEPROM address 0x02 declares upper
+ * pages are disabled on QSFP modules.
+ * SFP modules only ever use page 0.
+ */
+ if (page == 0 || !(data[0x2] & 0x4)) {
+ u32 copy_len;
+
+ /* If i2c bus is busy due to slow page change or
+ * link management access, call can fail. This is normal.
+ * So we retry this a few times.
+ */
+ for (j = 0; j < 4; j++) {
+ status = ice_aq_sff_eeprom(hw, 0, addr, offset, page,
+ !is_sfp, value,
+ SFF_READ_BLOCK_SIZE,
+ 0, NULL);
+ netdev_dbg(netdev, "SFF %02X %02X %02X %X = %02X%02X%02X%02X.%02X%02X%02X%02X (%X)\n",
+ addr, offset, page, is_sfp,
+ value[0], value[1], value[2], value[3],
+ value[4], value[5], value[6], value[7],
+ status);
+ if (status) {
+ usleep_range(1500, 2500);
+ memset(value, 0, SFF_READ_BLOCK_SIZE);
+ continue;
+ }
+ break;
+ }
+
+ /* Make sure we have enough room for the new block */
+ copy_len = min_t(u32, SFF_READ_BLOCK_SIZE, ee->len - i);
+ memcpy(data + i, value, copy_len);
+ }
+ }
+ return 0;
+}
+
+static const struct ethtool_ops ice_ethtool_ops = {
+ .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+ ETHTOOL_COALESCE_USE_ADAPTIVE |
+ ETHTOOL_COALESCE_RX_USECS_HIGH,
+ .get_link_ksettings = ice_get_link_ksettings,
+ .set_link_ksettings = ice_set_link_ksettings,
+ .get_drvinfo = ice_get_drvinfo,
+ .get_regs_len = ice_get_regs_len,
+ .get_regs = ice_get_regs,
+ .get_wol = ice_get_wol,
+ .set_wol = ice_set_wol,
+ .get_msglevel = ice_get_msglevel,
+ .set_msglevel = ice_set_msglevel,
+ .self_test = ice_self_test,
+ .get_link = ethtool_op_get_link,
+ .get_eeprom_len = ice_get_eeprom_len,
+ .get_eeprom = ice_get_eeprom,
+ .get_coalesce = ice_get_coalesce,
+ .set_coalesce = ice_set_coalesce,
+ .get_strings = ice_get_strings,
+ .set_phys_id = ice_set_phys_id,
+ .get_ethtool_stats = ice_get_ethtool_stats,
+ .get_priv_flags = ice_get_priv_flags,
+ .set_priv_flags = ice_set_priv_flags,
+ .get_sset_count = ice_get_sset_count,
+ .get_rxnfc = ice_get_rxnfc,
+ .set_rxnfc = ice_set_rxnfc,
+ .get_ringparam = ice_get_ringparam,
+ .set_ringparam = ice_set_ringparam,
+ .nway_reset = ice_nway_reset,
+ .get_pauseparam = ice_get_pauseparam,
+ .set_pauseparam = ice_set_pauseparam,
+ .get_rxfh_key_size = ice_get_rxfh_key_size,
+ .get_rxfh_indir_size = ice_get_rxfh_indir_size,
+ .get_rxfh_context = ice_get_rxfh_context,
+ .get_rxfh = ice_get_rxfh,
+ .set_rxfh = ice_set_rxfh,
+ .get_channels = ice_get_channels,
+ .set_channels = ice_set_channels,
+ .get_ts_info = ice_get_ts_info,
+ .get_per_queue_coalesce = ice_get_per_q_coalesce,
+ .set_per_queue_coalesce = ice_set_per_q_coalesce,
+ .get_fecparam = ice_get_fecparam,
+ .set_fecparam = ice_set_fecparam,
+ .get_module_info = ice_get_module_info,
+ .get_module_eeprom = ice_get_module_eeprom,
+};
+
+static const struct ethtool_ops ice_ethtool_safe_mode_ops = {
+ .get_link_ksettings = ice_get_link_ksettings,
+ .set_link_ksettings = ice_set_link_ksettings,
+ .get_drvinfo = ice_get_drvinfo,
+ .get_regs_len = ice_get_regs_len,
+ .get_regs = ice_get_regs,
+ .get_wol = ice_get_wol,
+ .set_wol = ice_set_wol,
+ .get_msglevel = ice_get_msglevel,
+ .set_msglevel = ice_set_msglevel,
+ .get_link = ethtool_op_get_link,
+ .get_eeprom_len = ice_get_eeprom_len,
+ .get_eeprom = ice_get_eeprom,
+ .get_strings = ice_get_strings,
+ .get_ethtool_stats = ice_get_ethtool_stats,
+ .get_sset_count = ice_get_sset_count,
+ .get_ringparam = ice_get_ringparam,
+ .set_ringparam = ice_set_ringparam,
+ .nway_reset = ice_nway_reset,
+ .get_channels = ice_get_channels,
+};
+
+/**
+ * ice_set_ethtool_safe_mode_ops - setup safe mode ethtool ops
+ * @netdev: network interface device structure
+ */
+void ice_set_ethtool_safe_mode_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &ice_ethtool_safe_mode_ops;
+}
+
+static const struct ethtool_ops ice_ethtool_repr_ops = {
+ .get_drvinfo = ice_repr_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = ice_repr_get_strings,
+ .get_ethtool_stats = ice_repr_get_ethtool_stats,
+ .get_sset_count = ice_repr_get_sset_count,
+};
+
+/**
+ * ice_set_ethtool_repr_ops - setup VF's port representor ethtool ops
+ * @netdev: network interface device structure
+ */
+void ice_set_ethtool_repr_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &ice_ethtool_repr_ops;
+}
+
+/**
+ * ice_set_ethtool_ops - setup netdev ethtool ops
+ * @netdev: network interface device structure
+ *
+ * setup netdev ethtool ops with ice specific ops
+ */
+void ice_set_ethtool_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &ice_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
new file mode 100644
index 000000000..8c6e13f87
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -0,0 +1,1939 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+/* flow director ethtool support for ice */
+
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_fdir.h"
+#include "ice_flow.h"
+
+static struct in6_addr full_ipv6_addr_mask = {
+ .in6_u = {
+ .u6_addr8 = {
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ }
+ }
+};
+
+static struct in6_addr zero_ipv6_addr_mask = {
+ .in6_u = {
+ .u6_addr8 = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ }
+ }
+};
+
+/* calls to ice_flow_add_prof require the number of segments in the array
+ * for segs_cnt. In this code that is one more than the index.
+ */
+#define TNL_SEG_CNT(_TNL_) ((_TNL_) + 1)
+
+/**
+ * ice_fltr_to_ethtool_flow - convert filter type values to ethtool
+ * flow type values
+ * @flow: filter type to be converted
+ *
+ * Returns the corresponding ethtool flow type.
+ */
+static int ice_fltr_to_ethtool_flow(enum ice_fltr_ptype flow)
+{
+ switch (flow) {
+ case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+ return TCP_V4_FLOW;
+ case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+ return UDP_V4_FLOW;
+ case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
+ return SCTP_V4_FLOW;
+ case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
+ return IPV4_USER_FLOW;
+ case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+ return TCP_V6_FLOW;
+ case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+ return UDP_V6_FLOW;
+ case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
+ return SCTP_V6_FLOW;
+ case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
+ return IPV6_USER_FLOW;
+ default:
+ /* 0 is undefined ethtool flow */
+ return 0;
+ }
+}
+
+/**
+ * ice_ethtool_flow_to_fltr - convert ethtool flow type to filter enum
+ * @eth: Ethtool flow type to be converted
+ *
+ * Returns flow enum
+ */
+static enum ice_fltr_ptype ice_ethtool_flow_to_fltr(int eth)
+{
+ switch (eth) {
+ case TCP_V4_FLOW:
+ return ICE_FLTR_PTYPE_NONF_IPV4_TCP;
+ case UDP_V4_FLOW:
+ return ICE_FLTR_PTYPE_NONF_IPV4_UDP;
+ case SCTP_V4_FLOW:
+ return ICE_FLTR_PTYPE_NONF_IPV4_SCTP;
+ case IPV4_USER_FLOW:
+ return ICE_FLTR_PTYPE_NONF_IPV4_OTHER;
+ case TCP_V6_FLOW:
+ return ICE_FLTR_PTYPE_NONF_IPV6_TCP;
+ case UDP_V6_FLOW:
+ return ICE_FLTR_PTYPE_NONF_IPV6_UDP;
+ case SCTP_V6_FLOW:
+ return ICE_FLTR_PTYPE_NONF_IPV6_SCTP;
+ case IPV6_USER_FLOW:
+ return ICE_FLTR_PTYPE_NONF_IPV6_OTHER;
+ default:
+ return ICE_FLTR_PTYPE_NONF_NONE;
+ }
+}
+
+/**
+ * ice_is_mask_valid - check mask field set
+ * @mask: full mask to check
+ * @field: field for which mask should be valid
+ *
+ * If the mask is fully set return true. If it is not valid for field return
+ * false.
+ */
+static bool ice_is_mask_valid(u64 mask, u64 field)
+{
+ return (mask & field) == field;
+}
+
+/**
+ * ice_get_ethtool_fdir_entry - fill ethtool structure with fdir filter data
+ * @hw: hardware structure that contains filter list
+ * @cmd: ethtool command data structure to receive the filter data
+ *
+ * Returns 0 on success and -EINVAL on failure
+ */
+int ice_get_ethtool_fdir_entry(struct ice_hw *hw, struct ethtool_rxnfc *cmd)
+{
+ struct ethtool_rx_flow_spec *fsp;
+ struct ice_fdir_fltr *rule;
+ int ret = 0;
+ u16 idx;
+
+ fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+ mutex_lock(&hw->fdir_fltr_lock);
+
+ rule = ice_fdir_find_fltr_by_idx(hw, fsp->location);
+
+ if (!rule || fsp->location != rule->fltr_id) {
+ ret = -EINVAL;
+ goto release_lock;
+ }
+
+ fsp->flow_type = ice_fltr_to_ethtool_flow(rule->flow_type);
+
+ memset(&fsp->m_u, 0, sizeof(fsp->m_u));
+ memset(&fsp->m_ext, 0, sizeof(fsp->m_ext));
+
+ switch (fsp->flow_type) {
+ case IPV4_USER_FLOW:
+ fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+ fsp->h_u.usr_ip4_spec.proto = 0;
+ fsp->h_u.usr_ip4_spec.l4_4_bytes = rule->ip.v4.l4_header;
+ fsp->h_u.usr_ip4_spec.tos = rule->ip.v4.tos;
+ fsp->h_u.usr_ip4_spec.ip4src = rule->ip.v4.src_ip;
+ fsp->h_u.usr_ip4_spec.ip4dst = rule->ip.v4.dst_ip;
+ fsp->m_u.usr_ip4_spec.ip4src = rule->mask.v4.src_ip;
+ fsp->m_u.usr_ip4_spec.ip4dst = rule->mask.v4.dst_ip;
+ fsp->m_u.usr_ip4_spec.ip_ver = 0xFF;
+ fsp->m_u.usr_ip4_spec.proto = 0;
+ fsp->m_u.usr_ip4_spec.l4_4_bytes = rule->mask.v4.l4_header;
+ fsp->m_u.usr_ip4_spec.tos = rule->mask.v4.tos;
+ break;
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ case SCTP_V4_FLOW:
+ fsp->h_u.tcp_ip4_spec.psrc = rule->ip.v4.src_port;
+ fsp->h_u.tcp_ip4_spec.pdst = rule->ip.v4.dst_port;
+ fsp->h_u.tcp_ip4_spec.ip4src = rule->ip.v4.src_ip;
+ fsp->h_u.tcp_ip4_spec.ip4dst = rule->ip.v4.dst_ip;
+ fsp->m_u.tcp_ip4_spec.psrc = rule->mask.v4.src_port;
+ fsp->m_u.tcp_ip4_spec.pdst = rule->mask.v4.dst_port;
+ fsp->m_u.tcp_ip4_spec.ip4src = rule->mask.v4.src_ip;
+ fsp->m_u.tcp_ip4_spec.ip4dst = rule->mask.v4.dst_ip;
+ break;
+ case IPV6_USER_FLOW:
+ fsp->h_u.usr_ip6_spec.l4_4_bytes = rule->ip.v6.l4_header;
+ fsp->h_u.usr_ip6_spec.tclass = rule->ip.v6.tc;
+ fsp->h_u.usr_ip6_spec.l4_proto = rule->ip.v6.proto;
+ memcpy(fsp->h_u.tcp_ip6_spec.ip6src, rule->ip.v6.src_ip,
+ sizeof(struct in6_addr));
+ memcpy(fsp->h_u.tcp_ip6_spec.ip6dst, rule->ip.v6.dst_ip,
+ sizeof(struct in6_addr));
+ memcpy(fsp->m_u.tcp_ip6_spec.ip6src, rule->mask.v6.src_ip,
+ sizeof(struct in6_addr));
+ memcpy(fsp->m_u.tcp_ip6_spec.ip6dst, rule->mask.v6.dst_ip,
+ sizeof(struct in6_addr));
+ fsp->m_u.usr_ip6_spec.l4_4_bytes = rule->mask.v6.l4_header;
+ fsp->m_u.usr_ip6_spec.tclass = rule->mask.v6.tc;
+ fsp->m_u.usr_ip6_spec.l4_proto = rule->mask.v6.proto;
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ case SCTP_V6_FLOW:
+ memcpy(fsp->h_u.tcp_ip6_spec.ip6src, rule->ip.v6.src_ip,
+ sizeof(struct in6_addr));
+ memcpy(fsp->h_u.tcp_ip6_spec.ip6dst, rule->ip.v6.dst_ip,
+ sizeof(struct in6_addr));
+ fsp->h_u.tcp_ip6_spec.psrc = rule->ip.v6.src_port;
+ fsp->h_u.tcp_ip6_spec.pdst = rule->ip.v6.dst_port;
+ memcpy(fsp->m_u.tcp_ip6_spec.ip6src,
+ rule->mask.v6.src_ip,
+ sizeof(struct in6_addr));
+ memcpy(fsp->m_u.tcp_ip6_spec.ip6dst,
+ rule->mask.v6.dst_ip,
+ sizeof(struct in6_addr));
+ fsp->m_u.tcp_ip6_spec.psrc = rule->mask.v6.src_port;
+ fsp->m_u.tcp_ip6_spec.pdst = rule->mask.v6.dst_port;
+ fsp->h_u.tcp_ip6_spec.tclass = rule->ip.v6.tc;
+ fsp->m_u.tcp_ip6_spec.tclass = rule->mask.v6.tc;
+ break;
+ default:
+ break;
+ }
+
+ if (rule->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DROP_PKT)
+ fsp->ring_cookie = RX_CLS_FLOW_DISC;
+ else
+ fsp->ring_cookie = rule->orig_q_index;
+
+ idx = ice_ethtool_flow_to_fltr(fsp->flow_type);
+ if (idx == ICE_FLTR_PTYPE_NONF_NONE) {
+ dev_err(ice_hw_to_dev(hw), "Missing input index for flow_type %d\n",
+ rule->flow_type);
+ ret = -EINVAL;
+ }
+
+release_lock:
+ mutex_unlock(&hw->fdir_fltr_lock);
+ return ret;
+}
+
+/**
+ * ice_get_fdir_fltr_ids - fill buffer with filter IDs of active filters
+ * @hw: hardware structure containing the filter list
+ * @cmd: ethtool command data structure
+ * @rule_locs: ethtool array passed in from OS to receive filter IDs
+ *
+ * Returns 0 as expected for success by ethtool
+ */
+int
+ice_get_fdir_fltr_ids(struct ice_hw *hw, struct ethtool_rxnfc *cmd,
+ u32 *rule_locs)
+{
+ struct ice_fdir_fltr *f_rule;
+ unsigned int cnt = 0;
+ int val = 0;
+
+ /* report total rule count */
+ cmd->data = ice_get_fdir_cnt_all(hw);
+
+ mutex_lock(&hw->fdir_fltr_lock);
+
+ list_for_each_entry(f_rule, &hw->fdir_list_head, fltr_node) {
+ if (cnt == cmd->rule_cnt) {
+ val = -EMSGSIZE;
+ goto release_lock;
+ }
+ rule_locs[cnt] = f_rule->fltr_id;
+ cnt++;
+ }
+
+release_lock:
+ mutex_unlock(&hw->fdir_fltr_lock);
+ if (!val)
+ cmd->rule_cnt = cnt;
+ return val;
+}
+
+/**
+ * ice_fdir_remap_entries - update the FDir entries in profile
+ * @prof: FDir structure pointer
+ * @tun: tunneled or non-tunneled packet
+ * @idx: FDir entry index
+ */
+static void
+ice_fdir_remap_entries(struct ice_fd_hw_prof *prof, int tun, int idx)
+{
+ if (idx != prof->cnt && tun < ICE_FD_HW_SEG_MAX) {
+ int i;
+
+ for (i = idx; i < (prof->cnt - 1); i++) {
+ u64 old_entry_h;
+
+ old_entry_h = prof->entry_h[i + 1][tun];
+ prof->entry_h[i][tun] = old_entry_h;
+ prof->vsi_h[i] = prof->vsi_h[i + 1];
+ }
+
+ prof->entry_h[i][tun] = 0;
+ prof->vsi_h[i] = 0;
+ }
+}
+
+/**
+ * ice_fdir_rem_adq_chnl - remove an ADQ channel from HW filter rules
+ * @hw: hardware structure containing filter list
+ * @vsi_idx: VSI handle
+ */
+void ice_fdir_rem_adq_chnl(struct ice_hw *hw, u16 vsi_idx)
+{
+ int status, flow;
+
+ if (!hw->fdir_prof)
+ return;
+
+ for (flow = 0; flow < ICE_FLTR_PTYPE_MAX; flow++) {
+ struct ice_fd_hw_prof *prof = hw->fdir_prof[flow];
+ int tun, i;
+
+ if (!prof || !prof->cnt)
+ continue;
+
+ for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+ u64 prof_id;
+
+ prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
+
+ for (i = 0; i < prof->cnt; i++) {
+ if (prof->vsi_h[i] != vsi_idx)
+ continue;
+
+ prof->entry_h[i][tun] = 0;
+ prof->vsi_h[i] = 0;
+ break;
+ }
+
+ /* after clearing FDir entries update the remaining */
+ ice_fdir_remap_entries(prof, tun, i);
+
+ /* find flow profile corresponding to prof_id and clear
+ * vsi_idx from bitmap.
+ */
+ status = ice_flow_rem_vsi_prof(hw, vsi_idx, prof_id);
+ if (status) {
+ dev_err(ice_hw_to_dev(hw), "ice_flow_rem_vsi_prof() failed status=%d\n",
+ status);
+ }
+ }
+ prof->cnt--;
+ }
+}
+
+/**
+ * ice_fdir_get_hw_prof - return the ice_fd_hw_proc associated with a flow
+ * @hw: hardware structure containing the filter list
+ * @blk: hardware block
+ * @flow: FDir flow type to release
+ */
+static struct ice_fd_hw_prof *
+ice_fdir_get_hw_prof(struct ice_hw *hw, enum ice_block blk, int flow)
+{
+ if (blk == ICE_BLK_FD && hw->fdir_prof)
+ return hw->fdir_prof[flow];
+
+ return NULL;
+}
+
+/**
+ * ice_fdir_erase_flow_from_hw - remove a flow from the HW profile tables
+ * @hw: hardware structure containing the filter list
+ * @blk: hardware block
+ * @flow: FDir flow type to release
+ */
+static void
+ice_fdir_erase_flow_from_hw(struct ice_hw *hw, enum ice_block blk, int flow)
+{
+ struct ice_fd_hw_prof *prof = ice_fdir_get_hw_prof(hw, blk, flow);
+ int tun;
+
+ if (!prof)
+ return;
+
+ for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+ u64 prof_id;
+ int j;
+
+ prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
+ for (j = 0; j < prof->cnt; j++) {
+ u16 vsi_num;
+
+ if (!prof->entry_h[j][tun] || !prof->vsi_h[j])
+ continue;
+ vsi_num = ice_get_hw_vsi_num(hw, prof->vsi_h[j]);
+ ice_rem_prof_id_flow(hw, blk, vsi_num, prof_id);
+ ice_flow_rem_entry(hw, blk, prof->entry_h[j][tun]);
+ prof->entry_h[j][tun] = 0;
+ }
+ ice_flow_rem_prof(hw, blk, prof_id);
+ }
+}
+
+/**
+ * ice_fdir_rem_flow - release the ice_flow structures for a filter type
+ * @hw: hardware structure containing the filter list
+ * @blk: hardware block
+ * @flow_type: FDir flow type to release
+ */
+static void
+ice_fdir_rem_flow(struct ice_hw *hw, enum ice_block blk,
+ enum ice_fltr_ptype flow_type)
+{
+ int flow = (int)flow_type & ~FLOW_EXT;
+ struct ice_fd_hw_prof *prof;
+ int tun, i;
+
+ prof = ice_fdir_get_hw_prof(hw, blk, flow);
+ if (!prof)
+ return;
+
+ ice_fdir_erase_flow_from_hw(hw, blk, flow);
+ for (i = 0; i < prof->cnt; i++)
+ prof->vsi_h[i] = 0;
+ for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+ if (!prof->fdir_seg[tun])
+ continue;
+ devm_kfree(ice_hw_to_dev(hw), prof->fdir_seg[tun]);
+ prof->fdir_seg[tun] = NULL;
+ }
+ prof->cnt = 0;
+}
+
+/**
+ * ice_fdir_release_flows - release all flows in use for later replay
+ * @hw: pointer to HW instance
+ */
+void ice_fdir_release_flows(struct ice_hw *hw)
+{
+ int flow;
+
+ /* release Flow Director HW table entries */
+ for (flow = 0; flow < ICE_FLTR_PTYPE_MAX; flow++)
+ ice_fdir_erase_flow_from_hw(hw, ICE_BLK_FD, flow);
+}
+
+/**
+ * ice_fdir_replay_flows - replay HW Flow Director filter info
+ * @hw: pointer to HW instance
+ */
+void ice_fdir_replay_flows(struct ice_hw *hw)
+{
+ int flow;
+
+ for (flow = 0; flow < ICE_FLTR_PTYPE_MAX; flow++) {
+ int tun;
+
+ if (!hw->fdir_prof[flow] || !hw->fdir_prof[flow]->cnt)
+ continue;
+ for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+ struct ice_flow_prof *hw_prof;
+ struct ice_fd_hw_prof *prof;
+ u64 prof_id;
+ int j;
+
+ prof = hw->fdir_prof[flow];
+ prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
+ ice_flow_add_prof(hw, ICE_BLK_FD, ICE_FLOW_RX, prof_id,
+ prof->fdir_seg[tun], TNL_SEG_CNT(tun),
+ &hw_prof);
+ for (j = 0; j < prof->cnt; j++) {
+ enum ice_flow_priority prio;
+ u64 entry_h = 0;
+ int err;
+
+ prio = ICE_FLOW_PRIO_NORMAL;
+ err = ice_flow_add_entry(hw, ICE_BLK_FD,
+ prof_id,
+ prof->vsi_h[0],
+ prof->vsi_h[j],
+ prio, prof->fdir_seg,
+ &entry_h);
+ if (err) {
+ dev_err(ice_hw_to_dev(hw), "Could not replay Flow Director, flow type %d\n",
+ flow);
+ continue;
+ }
+ prof->entry_h[j][tun] = entry_h;
+ }
+ }
+ }
+}
+
+/**
+ * ice_parse_rx_flow_user_data - deconstruct user-defined data
+ * @fsp: pointer to ethtool Rx flow specification
+ * @data: pointer to userdef data structure for storage
+ *
+ * Returns 0 on success, negative error value on failure
+ */
+static int
+ice_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp,
+ struct ice_rx_flow_userdef *data)
+{
+ u64 value, mask;
+
+ memset(data, 0, sizeof(*data));
+ if (!(fsp->flow_type & FLOW_EXT))
+ return 0;
+
+ value = be64_to_cpu(*((__force __be64 *)fsp->h_ext.data));
+ mask = be64_to_cpu(*((__force __be64 *)fsp->m_ext.data));
+ if (!mask)
+ return 0;
+
+#define ICE_USERDEF_FLEX_WORD_M GENMASK_ULL(15, 0)
+#define ICE_USERDEF_FLEX_OFFS_S 16
+#define ICE_USERDEF_FLEX_OFFS_M GENMASK_ULL(31, ICE_USERDEF_FLEX_OFFS_S)
+#define ICE_USERDEF_FLEX_FLTR_M GENMASK_ULL(31, 0)
+
+ /* 0x1fe is the maximum value for offsets stored in the internal
+ * filtering tables.
+ */
+#define ICE_USERDEF_FLEX_MAX_OFFS_VAL 0x1fe
+
+ if (!ice_is_mask_valid(mask, ICE_USERDEF_FLEX_FLTR_M) ||
+ value > ICE_USERDEF_FLEX_FLTR_M)
+ return -EINVAL;
+
+ data->flex_word = value & ICE_USERDEF_FLEX_WORD_M;
+ data->flex_offset = (value & ICE_USERDEF_FLEX_OFFS_M) >>
+ ICE_USERDEF_FLEX_OFFS_S;
+ if (data->flex_offset > ICE_USERDEF_FLEX_MAX_OFFS_VAL)
+ return -EINVAL;
+
+ data->flex_fltr = true;
+
+ return 0;
+}
+
+/**
+ * ice_fdir_num_avail_fltr - return the number of unused flow director filters
+ * @hw: pointer to hardware structure
+ * @vsi: software VSI structure
+ *
+ * There are 2 filter pools: guaranteed and best effort(shared). Each VSI can
+ * use filters from either pool. The guaranteed pool is divided between VSIs.
+ * The best effort filter pool is common to all VSIs and is a device shared
+ * resource pool. The number of filters available to this VSI is the sum of
+ * the VSIs guaranteed filter pool and the global available best effort
+ * filter pool.
+ *
+ * Returns the number of available flow director filters to this VSI
+ */
+static int ice_fdir_num_avail_fltr(struct ice_hw *hw, struct ice_vsi *vsi)
+{
+ u16 vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
+ u16 num_guar;
+ u16 num_be;
+
+ /* total guaranteed filters assigned to this VSI */
+ num_guar = vsi->num_gfltr;
+
+ /* minus the guaranteed filters programed by this VSI */
+ num_guar -= (rd32(hw, VSIQF_FD_CNT(vsi_num)) &
+ VSIQF_FD_CNT_FD_GCNT_M) >> VSIQF_FD_CNT_FD_GCNT_S;
+
+ /* total global best effort filters */
+ num_be = hw->func_caps.fd_fltr_best_effort;
+
+ /* minus the global best effort filters programmed */
+ num_be -= (rd32(hw, GLQF_FD_CNT) & GLQF_FD_CNT_FD_BCNT_M) >>
+ GLQF_FD_CNT_FD_BCNT_S;
+
+ return num_guar + num_be;
+}
+
+/**
+ * ice_fdir_alloc_flow_prof - allocate FDir flow profile structure(s)
+ * @hw: HW structure containing the FDir flow profile structure(s)
+ * @flow: flow type to allocate the flow profile for
+ *
+ * Allocate the fdir_prof and fdir_prof[flow] if not already created. Return 0
+ * on success and negative on error.
+ */
+static int
+ice_fdir_alloc_flow_prof(struct ice_hw *hw, enum ice_fltr_ptype flow)
+{
+ if (!hw)
+ return -EINVAL;
+
+ if (!hw->fdir_prof) {
+ hw->fdir_prof = devm_kcalloc(ice_hw_to_dev(hw),
+ ICE_FLTR_PTYPE_MAX,
+ sizeof(*hw->fdir_prof),
+ GFP_KERNEL);
+ if (!hw->fdir_prof)
+ return -ENOMEM;
+ }
+
+ if (!hw->fdir_prof[flow]) {
+ hw->fdir_prof[flow] = devm_kzalloc(ice_hw_to_dev(hw),
+ sizeof(**hw->fdir_prof),
+ GFP_KERNEL);
+ if (!hw->fdir_prof[flow])
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_fdir_prof_vsi_idx - find or insert a vsi_idx in structure
+ * @prof: pointer to flow director HW profile
+ * @vsi_idx: vsi_idx to locate
+ *
+ * return the index of the vsi_idx. if vsi_idx is not found insert it
+ * into the vsi_h table.
+ */
+static u16
+ice_fdir_prof_vsi_idx(struct ice_fd_hw_prof *prof, int vsi_idx)
+{
+ u16 idx = 0;
+
+ for (idx = 0; idx < prof->cnt; idx++)
+ if (prof->vsi_h[idx] == vsi_idx)
+ return idx;
+
+ if (idx == prof->cnt)
+ prof->vsi_h[prof->cnt++] = vsi_idx;
+ return idx;
+}
+
+/**
+ * ice_fdir_set_hw_fltr_rule - Configure HW tables to generate a FDir rule
+ * @pf: pointer to the PF structure
+ * @seg: protocol header description pointer
+ * @flow: filter enum
+ * @tun: FDir segment to program
+ */
+static int
+ice_fdir_set_hw_fltr_rule(struct ice_pf *pf, struct ice_flow_seg_info *seg,
+ enum ice_fltr_ptype flow, enum ice_fd_hw_seg tun)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_vsi *main_vsi, *ctrl_vsi;
+ struct ice_flow_seg_info *old_seg;
+ struct ice_flow_prof *prof = NULL;
+ struct ice_fd_hw_prof *hw_prof;
+ struct ice_hw *hw = &pf->hw;
+ u64 entry1_h = 0;
+ u64 entry2_h = 0;
+ bool del_last;
+ u64 prof_id;
+ int err;
+ int idx;
+
+ main_vsi = ice_get_main_vsi(pf);
+ if (!main_vsi)
+ return -EINVAL;
+
+ ctrl_vsi = ice_get_ctrl_vsi(pf);
+ if (!ctrl_vsi)
+ return -EINVAL;
+
+ err = ice_fdir_alloc_flow_prof(hw, flow);
+ if (err)
+ return err;
+
+ hw_prof = hw->fdir_prof[flow];
+ old_seg = hw_prof->fdir_seg[tun];
+ if (old_seg) {
+ /* This flow_type already has a changed input set.
+ * If it matches the requested input set then we are
+ * done. Or, if it's different then it's an error.
+ */
+ if (!memcmp(old_seg, seg, sizeof(*seg)))
+ return -EEXIST;
+
+ /* if there are FDir filters using this flow,
+ * then return error.
+ */
+ if (hw->fdir_fltr_cnt[flow]) {
+ dev_err(dev, "Failed to add filter. Flow director filters on each port must have the same input set.\n");
+ return -EINVAL;
+ }
+
+ if (ice_is_arfs_using_perfect_flow(hw, flow)) {
+ dev_err(dev, "aRFS using perfect flow type %d, cannot change input set\n",
+ flow);
+ return -EINVAL;
+ }
+
+ /* remove HW filter definition */
+ ice_fdir_rem_flow(hw, ICE_BLK_FD, flow);
+ }
+
+ /* Adding a profile, but there is only one header supported.
+ * That is the final parameters are 1 header (segment), no
+ * actions (NULL) and zero actions 0.
+ */
+ prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
+ err = ice_flow_add_prof(hw, ICE_BLK_FD, ICE_FLOW_RX, prof_id, seg,
+ TNL_SEG_CNT(tun), &prof);
+ if (err)
+ return err;
+ err = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, main_vsi->idx,
+ main_vsi->idx, ICE_FLOW_PRIO_NORMAL,
+ seg, &entry1_h);
+ if (err)
+ goto err_prof;
+ err = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, main_vsi->idx,
+ ctrl_vsi->idx, ICE_FLOW_PRIO_NORMAL,
+ seg, &entry2_h);
+ if (err)
+ goto err_entry;
+
+ hw_prof->fdir_seg[tun] = seg;
+ hw_prof->entry_h[0][tun] = entry1_h;
+ hw_prof->entry_h[1][tun] = entry2_h;
+ hw_prof->vsi_h[0] = main_vsi->idx;
+ hw_prof->vsi_h[1] = ctrl_vsi->idx;
+ if (!hw_prof->cnt)
+ hw_prof->cnt = 2;
+
+ for (idx = 1; idx < ICE_CHNL_MAX_TC; idx++) {
+ u16 vsi_idx;
+ u16 vsi_h;
+
+ if (!ice_is_adq_active(pf) || !main_vsi->tc_map_vsi[idx])
+ continue;
+
+ entry1_h = 0;
+ vsi_h = main_vsi->tc_map_vsi[idx]->idx;
+ err = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id,
+ main_vsi->idx, vsi_h,
+ ICE_FLOW_PRIO_NORMAL, seg,
+ &entry1_h);
+ if (err) {
+ dev_err(dev, "Could not add Channel VSI %d to flow group\n",
+ idx);
+ goto err_unroll;
+ }
+
+ vsi_idx = ice_fdir_prof_vsi_idx(hw_prof,
+ main_vsi->tc_map_vsi[idx]->idx);
+ hw_prof->entry_h[vsi_idx][tun] = entry1_h;
+ }
+
+ return 0;
+
+err_unroll:
+ entry1_h = 0;
+ hw_prof->fdir_seg[tun] = NULL;
+
+ /* The variable del_last will be used to determine when to clean up
+ * the VSI group data. The VSI data is not needed if there are no
+ * segments.
+ */
+ del_last = true;
+ for (idx = 0; idx < ICE_FD_HW_SEG_MAX; idx++)
+ if (hw_prof->fdir_seg[idx]) {
+ del_last = false;
+ break;
+ }
+
+ for (idx = 0; idx < hw_prof->cnt; idx++) {
+ u16 vsi_num = ice_get_hw_vsi_num(hw, hw_prof->vsi_h[idx]);
+
+ if (!hw_prof->entry_h[idx][tun])
+ continue;
+ ice_rem_prof_id_flow(hw, ICE_BLK_FD, vsi_num, prof_id);
+ ice_flow_rem_entry(hw, ICE_BLK_FD, hw_prof->entry_h[idx][tun]);
+ hw_prof->entry_h[idx][tun] = 0;
+ if (del_last)
+ hw_prof->vsi_h[idx] = 0;
+ }
+ if (del_last)
+ hw_prof->cnt = 0;
+err_entry:
+ ice_rem_prof_id_flow(hw, ICE_BLK_FD,
+ ice_get_hw_vsi_num(hw, main_vsi->idx), prof_id);
+ ice_flow_rem_entry(hw, ICE_BLK_FD, entry1_h);
+err_prof:
+ ice_flow_rem_prof(hw, ICE_BLK_FD, prof_id);
+ dev_err(dev, "Failed to add filter. Flow director filters on each port must have the same input set.\n");
+
+ return err;
+}
+
+/**
+ * ice_set_init_fdir_seg
+ * @seg: flow segment for programming
+ * @l3_proto: ICE_FLOW_SEG_HDR_IPV4 or ICE_FLOW_SEG_HDR_IPV6
+ * @l4_proto: ICE_FLOW_SEG_HDR_TCP or ICE_FLOW_SEG_HDR_UDP
+ *
+ * Set the configuration for perfect filters to the provided flow segment for
+ * programming the HW filter. This is to be called only when initializing
+ * filters as this function it assumes no filters exist.
+ */
+static int
+ice_set_init_fdir_seg(struct ice_flow_seg_info *seg,
+ enum ice_flow_seg_hdr l3_proto,
+ enum ice_flow_seg_hdr l4_proto)
+{
+ enum ice_flow_field src_addr, dst_addr, src_port, dst_port;
+
+ if (!seg)
+ return -EINVAL;
+
+ if (l3_proto == ICE_FLOW_SEG_HDR_IPV4) {
+ src_addr = ICE_FLOW_FIELD_IDX_IPV4_SA;
+ dst_addr = ICE_FLOW_FIELD_IDX_IPV4_DA;
+ } else if (l3_proto == ICE_FLOW_SEG_HDR_IPV6) {
+ src_addr = ICE_FLOW_FIELD_IDX_IPV6_SA;
+ dst_addr = ICE_FLOW_FIELD_IDX_IPV6_DA;
+ } else {
+ return -EINVAL;
+ }
+
+ if (l4_proto == ICE_FLOW_SEG_HDR_TCP) {
+ src_port = ICE_FLOW_FIELD_IDX_TCP_SRC_PORT;
+ dst_port = ICE_FLOW_FIELD_IDX_TCP_DST_PORT;
+ } else if (l4_proto == ICE_FLOW_SEG_HDR_UDP) {
+ src_port = ICE_FLOW_FIELD_IDX_UDP_SRC_PORT;
+ dst_port = ICE_FLOW_FIELD_IDX_UDP_DST_PORT;
+ } else {
+ return -EINVAL;
+ }
+
+ ICE_FLOW_SET_HDRS(seg, l3_proto | l4_proto);
+
+ /* IP source address */
+ ice_flow_set_fld(seg, src_addr, ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, false);
+
+ /* IP destination address */
+ ice_flow_set_fld(seg, dst_addr, ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, false);
+
+ /* Layer 4 source port */
+ ice_flow_set_fld(seg, src_port, ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, false);
+
+ /* Layer 4 destination port */
+ ice_flow_set_fld(seg, dst_port, ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, false);
+
+ return 0;
+}
+
+/**
+ * ice_create_init_fdir_rule
+ * @pf: PF structure
+ * @flow: filter enum
+ *
+ * Return error value or 0 on success.
+ */
+static int
+ice_create_init_fdir_rule(struct ice_pf *pf, enum ice_fltr_ptype flow)
+{
+ struct ice_flow_seg_info *seg, *tun_seg;
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ int ret;
+
+ /* if there is already a filter rule for kind return -EINVAL */
+ if (hw->fdir_prof && hw->fdir_prof[flow] &&
+ hw->fdir_prof[flow]->fdir_seg[0])
+ return -EINVAL;
+
+ seg = devm_kzalloc(dev, sizeof(*seg), GFP_KERNEL);
+ if (!seg)
+ return -ENOMEM;
+
+ tun_seg = devm_kcalloc(dev, ICE_FD_HW_SEG_MAX, sizeof(*tun_seg),
+ GFP_KERNEL);
+ if (!tun_seg) {
+ devm_kfree(dev, seg);
+ return -ENOMEM;
+ }
+
+ if (flow == ICE_FLTR_PTYPE_NONF_IPV4_TCP)
+ ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_IPV4,
+ ICE_FLOW_SEG_HDR_TCP);
+ else if (flow == ICE_FLTR_PTYPE_NONF_IPV4_UDP)
+ ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_IPV4,
+ ICE_FLOW_SEG_HDR_UDP);
+ else if (flow == ICE_FLTR_PTYPE_NONF_IPV6_TCP)
+ ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_IPV6,
+ ICE_FLOW_SEG_HDR_TCP);
+ else if (flow == ICE_FLTR_PTYPE_NONF_IPV6_UDP)
+ ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_IPV6,
+ ICE_FLOW_SEG_HDR_UDP);
+ else
+ ret = -EINVAL;
+ if (ret)
+ goto err_exit;
+
+ /* add filter for outer headers */
+ ret = ice_fdir_set_hw_fltr_rule(pf, seg, flow, ICE_FD_HW_SEG_NON_TUN);
+ if (ret)
+ /* could not write filter, free memory */
+ goto err_exit;
+
+ /* make tunneled filter HW entries if possible */
+ memcpy(&tun_seg[1], seg, sizeof(*seg));
+ ret = ice_fdir_set_hw_fltr_rule(pf, tun_seg, flow, ICE_FD_HW_SEG_TUN);
+ if (ret)
+ /* could not write tunnel filter, but outer header filter
+ * exists
+ */
+ devm_kfree(dev, tun_seg);
+
+ set_bit(flow, hw->fdir_perfect_fltr);
+ return ret;
+err_exit:
+ devm_kfree(dev, tun_seg);
+ devm_kfree(dev, seg);
+
+ return -EOPNOTSUPP;
+}
+
+/**
+ * ice_set_fdir_ip4_seg
+ * @seg: flow segment for programming
+ * @tcp_ip4_spec: mask data from ethtool
+ * @l4_proto: Layer 4 protocol to program
+ * @perfect_fltr: only valid on success; returns true if perfect filter,
+ * false if not
+ *
+ * Set the mask data into the flow segment to be used to program HW
+ * table based on provided L4 protocol for IPv4
+ */
+static int
+ice_set_fdir_ip4_seg(struct ice_flow_seg_info *seg,
+ struct ethtool_tcpip4_spec *tcp_ip4_spec,
+ enum ice_flow_seg_hdr l4_proto, bool *perfect_fltr)
+{
+ enum ice_flow_field src_port, dst_port;
+
+ /* make sure we don't have any empty rule */
+ if (!tcp_ip4_spec->psrc && !tcp_ip4_spec->ip4src &&
+ !tcp_ip4_spec->pdst && !tcp_ip4_spec->ip4dst)
+ return -EINVAL;
+
+ /* filtering on TOS not supported */
+ if (tcp_ip4_spec->tos)
+ return -EOPNOTSUPP;
+
+ if (l4_proto == ICE_FLOW_SEG_HDR_TCP) {
+ src_port = ICE_FLOW_FIELD_IDX_TCP_SRC_PORT;
+ dst_port = ICE_FLOW_FIELD_IDX_TCP_DST_PORT;
+ } else if (l4_proto == ICE_FLOW_SEG_HDR_UDP) {
+ src_port = ICE_FLOW_FIELD_IDX_UDP_SRC_PORT;
+ dst_port = ICE_FLOW_FIELD_IDX_UDP_DST_PORT;
+ } else if (l4_proto == ICE_FLOW_SEG_HDR_SCTP) {
+ src_port = ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT;
+ dst_port = ICE_FLOW_FIELD_IDX_SCTP_DST_PORT;
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ *perfect_fltr = true;
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV4 | l4_proto);
+
+ /* IP source address */
+ if (tcp_ip4_spec->ip4src == htonl(0xFFFFFFFF))
+ ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_SA,
+ ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, false);
+ else if (!tcp_ip4_spec->ip4src)
+ *perfect_fltr = false;
+ else
+ return -EOPNOTSUPP;
+
+ /* IP destination address */
+ if (tcp_ip4_spec->ip4dst == htonl(0xFFFFFFFF))
+ ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_DA,
+ ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, false);
+ else if (!tcp_ip4_spec->ip4dst)
+ *perfect_fltr = false;
+ else
+ return -EOPNOTSUPP;
+
+ /* Layer 4 source port */
+ if (tcp_ip4_spec->psrc == htons(0xFFFF))
+ ice_flow_set_fld(seg, src_port, ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+ false);
+ else if (!tcp_ip4_spec->psrc)
+ *perfect_fltr = false;
+ else
+ return -EOPNOTSUPP;
+
+ /* Layer 4 destination port */
+ if (tcp_ip4_spec->pdst == htons(0xFFFF))
+ ice_flow_set_fld(seg, dst_port, ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+ false);
+ else if (!tcp_ip4_spec->pdst)
+ *perfect_fltr = false;
+ else
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+/**
+ * ice_set_fdir_ip4_usr_seg
+ * @seg: flow segment for programming
+ * @usr_ip4_spec: ethtool userdef packet offset
+ * @perfect_fltr: only valid on success; returns true if perfect filter,
+ * false if not
+ *
+ * Set the offset data into the flow segment to be used to program HW
+ * table for IPv4
+ */
+static int
+ice_set_fdir_ip4_usr_seg(struct ice_flow_seg_info *seg,
+ struct ethtool_usrip4_spec *usr_ip4_spec,
+ bool *perfect_fltr)
+{
+ /* first 4 bytes of Layer 4 header */
+ if (usr_ip4_spec->l4_4_bytes)
+ return -EINVAL;
+ if (usr_ip4_spec->tos)
+ return -EINVAL;
+ if (usr_ip4_spec->ip_ver)
+ return -EINVAL;
+ /* Filtering on Layer 4 protocol not supported */
+ if (usr_ip4_spec->proto)
+ return -EOPNOTSUPP;
+ /* empty rules are not valid */
+ if (!usr_ip4_spec->ip4src && !usr_ip4_spec->ip4dst)
+ return -EINVAL;
+
+ *perfect_fltr = true;
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV4);
+
+ /* IP source address */
+ if (usr_ip4_spec->ip4src == htonl(0xFFFFFFFF))
+ ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_SA,
+ ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, false);
+ else if (!usr_ip4_spec->ip4src)
+ *perfect_fltr = false;
+ else
+ return -EOPNOTSUPP;
+
+ /* IP destination address */
+ if (usr_ip4_spec->ip4dst == htonl(0xFFFFFFFF))
+ ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_DA,
+ ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, false);
+ else if (!usr_ip4_spec->ip4dst)
+ *perfect_fltr = false;
+ else
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+/**
+ * ice_set_fdir_ip6_seg
+ * @seg: flow segment for programming
+ * @tcp_ip6_spec: mask data from ethtool
+ * @l4_proto: Layer 4 protocol to program
+ * @perfect_fltr: only valid on success; returns true if perfect filter,
+ * false if not
+ *
+ * Set the mask data into the flow segment to be used to program HW
+ * table based on provided L4 protocol for IPv6
+ */
+static int
+ice_set_fdir_ip6_seg(struct ice_flow_seg_info *seg,
+ struct ethtool_tcpip6_spec *tcp_ip6_spec,
+ enum ice_flow_seg_hdr l4_proto, bool *perfect_fltr)
+{
+ enum ice_flow_field src_port, dst_port;
+
+ /* make sure we don't have any empty rule */
+ if (!memcmp(tcp_ip6_spec->ip6src, &zero_ipv6_addr_mask,
+ sizeof(struct in6_addr)) &&
+ !memcmp(tcp_ip6_spec->ip6dst, &zero_ipv6_addr_mask,
+ sizeof(struct in6_addr)) &&
+ !tcp_ip6_spec->psrc && !tcp_ip6_spec->pdst)
+ return -EINVAL;
+
+ /* filtering on TC not supported */
+ if (tcp_ip6_spec->tclass)
+ return -EOPNOTSUPP;
+
+ if (l4_proto == ICE_FLOW_SEG_HDR_TCP) {
+ src_port = ICE_FLOW_FIELD_IDX_TCP_SRC_PORT;
+ dst_port = ICE_FLOW_FIELD_IDX_TCP_DST_PORT;
+ } else if (l4_proto == ICE_FLOW_SEG_HDR_UDP) {
+ src_port = ICE_FLOW_FIELD_IDX_UDP_SRC_PORT;
+ dst_port = ICE_FLOW_FIELD_IDX_UDP_DST_PORT;
+ } else if (l4_proto == ICE_FLOW_SEG_HDR_SCTP) {
+ src_port = ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT;
+ dst_port = ICE_FLOW_FIELD_IDX_SCTP_DST_PORT;
+ } else {
+ return -EINVAL;
+ }
+
+ *perfect_fltr = true;
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV6 | l4_proto);
+
+ if (!memcmp(tcp_ip6_spec->ip6src, &full_ipv6_addr_mask,
+ sizeof(struct in6_addr)))
+ ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV6_SA,
+ ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, false);
+ else if (!memcmp(tcp_ip6_spec->ip6src, &zero_ipv6_addr_mask,
+ sizeof(struct in6_addr)))
+ *perfect_fltr = false;
+ else
+ return -EOPNOTSUPP;
+
+ if (!memcmp(tcp_ip6_spec->ip6dst, &full_ipv6_addr_mask,
+ sizeof(struct in6_addr)))
+ ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV6_DA,
+ ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, false);
+ else if (!memcmp(tcp_ip6_spec->ip6dst, &zero_ipv6_addr_mask,
+ sizeof(struct in6_addr)))
+ *perfect_fltr = false;
+ else
+ return -EOPNOTSUPP;
+
+ /* Layer 4 source port */
+ if (tcp_ip6_spec->psrc == htons(0xFFFF))
+ ice_flow_set_fld(seg, src_port, ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+ false);
+ else if (!tcp_ip6_spec->psrc)
+ *perfect_fltr = false;
+ else
+ return -EOPNOTSUPP;
+
+ /* Layer 4 destination port */
+ if (tcp_ip6_spec->pdst == htons(0xFFFF))
+ ice_flow_set_fld(seg, dst_port, ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+ false);
+ else if (!tcp_ip6_spec->pdst)
+ *perfect_fltr = false;
+ else
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+/**
+ * ice_set_fdir_ip6_usr_seg
+ * @seg: flow segment for programming
+ * @usr_ip6_spec: ethtool userdef packet offset
+ * @perfect_fltr: only valid on success; returns true if perfect filter,
+ * false if not
+ *
+ * Set the offset data into the flow segment to be used to program HW
+ * table for IPv6
+ */
+static int
+ice_set_fdir_ip6_usr_seg(struct ice_flow_seg_info *seg,
+ struct ethtool_usrip6_spec *usr_ip6_spec,
+ bool *perfect_fltr)
+{
+ /* filtering on Layer 4 bytes not supported */
+ if (usr_ip6_spec->l4_4_bytes)
+ return -EOPNOTSUPP;
+ /* filtering on TC not supported */
+ if (usr_ip6_spec->tclass)
+ return -EOPNOTSUPP;
+ /* filtering on Layer 4 protocol not supported */
+ if (usr_ip6_spec->l4_proto)
+ return -EOPNOTSUPP;
+ /* empty rules are not valid */
+ if (!memcmp(usr_ip6_spec->ip6src, &zero_ipv6_addr_mask,
+ sizeof(struct in6_addr)) &&
+ !memcmp(usr_ip6_spec->ip6dst, &zero_ipv6_addr_mask,
+ sizeof(struct in6_addr)))
+ return -EINVAL;
+
+ *perfect_fltr = true;
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV6);
+
+ if (!memcmp(usr_ip6_spec->ip6src, &full_ipv6_addr_mask,
+ sizeof(struct in6_addr)))
+ ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV6_SA,
+ ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, false);
+ else if (!memcmp(usr_ip6_spec->ip6src, &zero_ipv6_addr_mask,
+ sizeof(struct in6_addr)))
+ *perfect_fltr = false;
+ else
+ return -EOPNOTSUPP;
+
+ if (!memcmp(usr_ip6_spec->ip6dst, &full_ipv6_addr_mask,
+ sizeof(struct in6_addr)))
+ ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV6_DA,
+ ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, false);
+ else if (!memcmp(usr_ip6_spec->ip6dst, &zero_ipv6_addr_mask,
+ sizeof(struct in6_addr)))
+ *perfect_fltr = false;
+ else
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+/**
+ * ice_cfg_fdir_xtrct_seq - Configure extraction sequence for the given filter
+ * @pf: PF structure
+ * @fsp: pointer to ethtool Rx flow specification
+ * @user: user defined data from flow specification
+ *
+ * Returns 0 on success.
+ */
+static int
+ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp,
+ struct ice_rx_flow_userdef *user)
+{
+ struct ice_flow_seg_info *seg, *tun_seg;
+ struct device *dev = ice_pf_to_dev(pf);
+ enum ice_fltr_ptype fltr_idx;
+ struct ice_hw *hw = &pf->hw;
+ bool perfect_filter;
+ int ret;
+
+ seg = devm_kzalloc(dev, sizeof(*seg), GFP_KERNEL);
+ if (!seg)
+ return -ENOMEM;
+
+ tun_seg = devm_kcalloc(dev, ICE_FD_HW_SEG_MAX, sizeof(*tun_seg),
+ GFP_KERNEL);
+ if (!tun_seg) {
+ devm_kfree(dev, seg);
+ return -ENOMEM;
+ }
+
+ switch (fsp->flow_type & ~FLOW_EXT) {
+ case TCP_V4_FLOW:
+ ret = ice_set_fdir_ip4_seg(seg, &fsp->m_u.tcp_ip4_spec,
+ ICE_FLOW_SEG_HDR_TCP,
+ &perfect_filter);
+ break;
+ case UDP_V4_FLOW:
+ ret = ice_set_fdir_ip4_seg(seg, &fsp->m_u.tcp_ip4_spec,
+ ICE_FLOW_SEG_HDR_UDP,
+ &perfect_filter);
+ break;
+ case SCTP_V4_FLOW:
+ ret = ice_set_fdir_ip4_seg(seg, &fsp->m_u.tcp_ip4_spec,
+ ICE_FLOW_SEG_HDR_SCTP,
+ &perfect_filter);
+ break;
+ case IPV4_USER_FLOW:
+ ret = ice_set_fdir_ip4_usr_seg(seg, &fsp->m_u.usr_ip4_spec,
+ &perfect_filter);
+ break;
+ case TCP_V6_FLOW:
+ ret = ice_set_fdir_ip6_seg(seg, &fsp->m_u.tcp_ip6_spec,
+ ICE_FLOW_SEG_HDR_TCP,
+ &perfect_filter);
+ break;
+ case UDP_V6_FLOW:
+ ret = ice_set_fdir_ip6_seg(seg, &fsp->m_u.tcp_ip6_spec,
+ ICE_FLOW_SEG_HDR_UDP,
+ &perfect_filter);
+ break;
+ case SCTP_V6_FLOW:
+ ret = ice_set_fdir_ip6_seg(seg, &fsp->m_u.tcp_ip6_spec,
+ ICE_FLOW_SEG_HDR_SCTP,
+ &perfect_filter);
+ break;
+ case IPV6_USER_FLOW:
+ ret = ice_set_fdir_ip6_usr_seg(seg, &fsp->m_u.usr_ip6_spec,
+ &perfect_filter);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ if (ret)
+ goto err_exit;
+
+ /* tunnel segments are shifted up one. */
+ memcpy(&tun_seg[1], seg, sizeof(*seg));
+
+ if (user && user->flex_fltr) {
+ perfect_filter = false;
+ ice_flow_add_fld_raw(seg, user->flex_offset,
+ ICE_FLTR_PRGM_FLEX_WORD_SIZE,
+ ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL);
+ ice_flow_add_fld_raw(&tun_seg[1], user->flex_offset,
+ ICE_FLTR_PRGM_FLEX_WORD_SIZE,
+ ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL);
+ }
+
+ fltr_idx = ice_ethtool_flow_to_fltr(fsp->flow_type & ~FLOW_EXT);
+
+ assign_bit(fltr_idx, hw->fdir_perfect_fltr, perfect_filter);
+
+ /* add filter for outer headers */
+ ret = ice_fdir_set_hw_fltr_rule(pf, seg, fltr_idx,
+ ICE_FD_HW_SEG_NON_TUN);
+ if (ret == -EEXIST) {
+ /* Rule already exists, free memory and count as success */
+ ret = 0;
+ goto err_exit;
+ } else if (ret) {
+ /* could not write filter, free memory */
+ goto err_exit;
+ }
+
+ /* make tunneled filter HW entries if possible */
+ memcpy(&tun_seg[1], seg, sizeof(*seg));
+ ret = ice_fdir_set_hw_fltr_rule(pf, tun_seg, fltr_idx,
+ ICE_FD_HW_SEG_TUN);
+ if (ret == -EEXIST) {
+ /* Rule already exists, free memory and count as success */
+ devm_kfree(dev, tun_seg);
+ ret = 0;
+ } else if (ret) {
+ /* could not write tunnel filter, but outer filter exists */
+ devm_kfree(dev, tun_seg);
+ }
+
+ return ret;
+
+err_exit:
+ devm_kfree(dev, tun_seg);
+ devm_kfree(dev, seg);
+
+ return ret;
+}
+
+/**
+ * ice_update_per_q_fltr
+ * @vsi: ptr to VSI
+ * @q_index: queue index
+ * @inc: true to increment or false to decrement per queue filter count
+ *
+ * This function is used to keep track of per queue sideband filters
+ */
+static void ice_update_per_q_fltr(struct ice_vsi *vsi, u32 q_index, bool inc)
+{
+ struct ice_rx_ring *rx_ring;
+
+ if (!vsi->num_rxq || q_index >= vsi->num_rxq)
+ return;
+
+ rx_ring = vsi->rx_rings[q_index];
+ if (!rx_ring || !rx_ring->ch)
+ return;
+
+ if (inc)
+ atomic_inc(&rx_ring->ch->num_sb_fltr);
+ else
+ atomic_dec_if_positive(&rx_ring->ch->num_sb_fltr);
+}
+
+/**
+ * ice_fdir_write_fltr - send a flow director filter to the hardware
+ * @pf: PF data structure
+ * @input: filter structure
+ * @add: true adds filter and false removed filter
+ * @is_tun: true adds inner filter on tunnel and false outer headers
+ *
+ * returns 0 on success and negative value on error
+ */
+int
+ice_fdir_write_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool add,
+ bool is_tun)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ struct ice_fltr_desc desc;
+ struct ice_vsi *ctrl_vsi;
+ u8 *pkt, *frag_pkt;
+ bool has_frag;
+ int err;
+
+ ctrl_vsi = ice_get_ctrl_vsi(pf);
+ if (!ctrl_vsi)
+ return -EINVAL;
+
+ pkt = devm_kzalloc(dev, ICE_FDIR_MAX_RAW_PKT_SIZE, GFP_KERNEL);
+ if (!pkt)
+ return -ENOMEM;
+ frag_pkt = devm_kzalloc(dev, ICE_FDIR_MAX_RAW_PKT_SIZE, GFP_KERNEL);
+ if (!frag_pkt) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ ice_fdir_get_prgm_desc(hw, input, &desc, add);
+ err = ice_fdir_get_gen_prgm_pkt(hw, input, pkt, false, is_tun);
+ if (err)
+ goto err_free_all;
+ err = ice_prgm_fdir_fltr(ctrl_vsi, &desc, pkt);
+ if (err)
+ goto err_free_all;
+
+ /* repeat for fragment packet */
+ has_frag = ice_fdir_has_frag(input->flow_type);
+ if (has_frag) {
+ /* does not return error */
+ ice_fdir_get_prgm_desc(hw, input, &desc, add);
+ err = ice_fdir_get_gen_prgm_pkt(hw, input, frag_pkt, true,
+ is_tun);
+ if (err)
+ goto err_frag;
+ err = ice_prgm_fdir_fltr(ctrl_vsi, &desc, frag_pkt);
+ if (err)
+ goto err_frag;
+ } else {
+ devm_kfree(dev, frag_pkt);
+ }
+
+ return 0;
+
+err_free_all:
+ devm_kfree(dev, frag_pkt);
+err_free:
+ devm_kfree(dev, pkt);
+ return err;
+
+err_frag:
+ devm_kfree(dev, frag_pkt);
+ return err;
+}
+
+/**
+ * ice_fdir_write_all_fltr - send a flow director filter to the hardware
+ * @pf: PF data structure
+ * @input: filter structure
+ * @add: true adds filter and false removed filter
+ *
+ * returns 0 on success and negative value on error
+ */
+static int
+ice_fdir_write_all_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input,
+ bool add)
+{
+ u16 port_num;
+ int tun;
+
+ for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+ bool is_tun = tun == ICE_FD_HW_SEG_TUN;
+ int err;
+
+ if (is_tun && !ice_get_open_tunnel_port(&pf->hw, &port_num, TNL_ALL))
+ continue;
+ err = ice_fdir_write_fltr(pf, input, add, is_tun);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+/**
+ * ice_fdir_replay_fltrs - replay filters from the HW filter list
+ * @pf: board private structure
+ */
+void ice_fdir_replay_fltrs(struct ice_pf *pf)
+{
+ struct ice_fdir_fltr *f_rule;
+ struct ice_hw *hw = &pf->hw;
+
+ list_for_each_entry(f_rule, &hw->fdir_list_head, fltr_node) {
+ int err = ice_fdir_write_all_fltr(pf, f_rule, true);
+
+ if (err)
+ dev_dbg(ice_pf_to_dev(pf), "Flow Director error %d, could not reprogram filter %d\n",
+ err, f_rule->fltr_id);
+ }
+}
+
+/**
+ * ice_fdir_create_dflt_rules - create default perfect filters
+ * @pf: PF data structure
+ *
+ * Returns 0 for success or error.
+ */
+int ice_fdir_create_dflt_rules(struct ice_pf *pf)
+{
+ int err;
+
+ /* Create perfect TCP and UDP rules in hardware. */
+ err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV4_TCP);
+ if (err)
+ return err;
+
+ err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV4_UDP);
+ if (err)
+ return err;
+
+ err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV6_TCP);
+ if (err)
+ return err;
+
+ err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV6_UDP);
+
+ return err;
+}
+
+/**
+ * ice_fdir_del_all_fltrs - Delete all flow director filters
+ * @vsi: the VSI being changed
+ *
+ * This function needs to be called while holding hw->fdir_fltr_lock
+ */
+void ice_fdir_del_all_fltrs(struct ice_vsi *vsi)
+{
+ struct ice_fdir_fltr *f_rule, *tmp;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+
+ list_for_each_entry_safe(f_rule, tmp, &hw->fdir_list_head, fltr_node) {
+ ice_fdir_write_all_fltr(pf, f_rule, false);
+ ice_fdir_update_cntrs(hw, f_rule->flow_type, false);
+ list_del(&f_rule->fltr_node);
+ devm_kfree(ice_pf_to_dev(pf), f_rule);
+ }
+}
+
+/**
+ * ice_vsi_manage_fdir - turn on/off flow director
+ * @vsi: the VSI being changed
+ * @ena: boolean value indicating if this is an enable or disable request
+ */
+void ice_vsi_manage_fdir(struct ice_vsi *vsi, bool ena)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ enum ice_fltr_ptype flow;
+
+ if (ena) {
+ set_bit(ICE_FLAG_FD_ENA, pf->flags);
+ ice_fdir_create_dflt_rules(pf);
+ return;
+ }
+
+ mutex_lock(&hw->fdir_fltr_lock);
+ if (!test_and_clear_bit(ICE_FLAG_FD_ENA, pf->flags))
+ goto release_lock;
+
+ ice_fdir_del_all_fltrs(vsi);
+
+ if (hw->fdir_prof)
+ for (flow = ICE_FLTR_PTYPE_NONF_NONE; flow < ICE_FLTR_PTYPE_MAX;
+ flow++)
+ if (hw->fdir_prof[flow])
+ ice_fdir_rem_flow(hw, ICE_BLK_FD, flow);
+
+release_lock:
+ mutex_unlock(&hw->fdir_fltr_lock);
+}
+
+/**
+ * ice_fdir_do_rem_flow - delete flow and possibly add perfect flow
+ * @pf: PF structure
+ * @flow_type: FDir flow type to release
+ */
+static void
+ice_fdir_do_rem_flow(struct ice_pf *pf, enum ice_fltr_ptype flow_type)
+{
+ struct ice_hw *hw = &pf->hw;
+ bool need_perfect = false;
+
+ if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
+ flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+ flow_type == ICE_FLTR_PTYPE_NONF_IPV6_TCP ||
+ flow_type == ICE_FLTR_PTYPE_NONF_IPV6_UDP)
+ need_perfect = true;
+
+ if (need_perfect && test_bit(flow_type, hw->fdir_perfect_fltr))
+ return;
+
+ ice_fdir_rem_flow(hw, ICE_BLK_FD, flow_type);
+ if (need_perfect)
+ ice_create_init_fdir_rule(pf, flow_type);
+}
+
+/**
+ * ice_fdir_update_list_entry - add or delete a filter from the filter list
+ * @pf: PF structure
+ * @input: filter structure
+ * @fltr_idx: ethtool index of filter to modify
+ *
+ * returns 0 on success and negative on errors
+ */
+static int
+ice_fdir_update_list_entry(struct ice_pf *pf, struct ice_fdir_fltr *input,
+ int fltr_idx)
+{
+ struct ice_fdir_fltr *old_fltr;
+ struct ice_hw *hw = &pf->hw;
+ struct ice_vsi *vsi;
+ int err = -ENOENT;
+
+ /* Do not update filters during reset */
+ if (ice_is_reset_in_progress(pf->state))
+ return -EBUSY;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi)
+ return -EINVAL;
+
+ old_fltr = ice_fdir_find_fltr_by_idx(hw, fltr_idx);
+ if (old_fltr) {
+ err = ice_fdir_write_all_fltr(pf, old_fltr, false);
+ if (err)
+ return err;
+ ice_fdir_update_cntrs(hw, old_fltr->flow_type, false);
+ /* update sb-filters count, specific to ring->channel */
+ ice_update_per_q_fltr(vsi, old_fltr->orig_q_index, false);
+ if (!input && !hw->fdir_fltr_cnt[old_fltr->flow_type])
+ /* we just deleted the last filter of flow_type so we
+ * should also delete the HW filter info.
+ */
+ ice_fdir_do_rem_flow(pf, old_fltr->flow_type);
+ list_del(&old_fltr->fltr_node);
+ devm_kfree(ice_hw_to_dev(hw), old_fltr);
+ }
+ if (!input)
+ return err;
+ ice_fdir_list_add_fltr(hw, input);
+ /* update sb-filters count, specific to ring->channel */
+ ice_update_per_q_fltr(vsi, input->orig_q_index, true);
+ ice_fdir_update_cntrs(hw, input->flow_type, true);
+ return 0;
+}
+
+/**
+ * ice_del_fdir_ethtool - delete Flow Director filter
+ * @vsi: pointer to target VSI
+ * @cmd: command to add or delete Flow Director filter
+ *
+ * Returns 0 on success and negative values for failure
+ */
+int ice_del_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
+{
+ struct ethtool_rx_flow_spec *fsp =
+ (struct ethtool_rx_flow_spec *)&cmd->fs;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ int val;
+
+ if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
+ return -EOPNOTSUPP;
+
+ /* Do not delete filters during reset */
+ if (ice_is_reset_in_progress(pf->state)) {
+ dev_err(ice_pf_to_dev(pf), "Device is resetting - deleting Flow Director filters not supported during reset\n");
+ return -EBUSY;
+ }
+
+ if (test_bit(ICE_FD_FLUSH_REQ, pf->state))
+ return -EBUSY;
+
+ mutex_lock(&hw->fdir_fltr_lock);
+ val = ice_fdir_update_list_entry(pf, NULL, fsp->location);
+ mutex_unlock(&hw->fdir_fltr_lock);
+
+ return val;
+}
+
+/**
+ * ice_update_ring_dest_vsi - update dest ring and dest VSI
+ * @vsi: pointer to target VSI
+ * @dest_vsi: ptr to dest VSI index
+ * @ring: ptr to dest ring
+ *
+ * This function updates destination VSI and queue if user specifies
+ * target queue which falls in channel's (aka ADQ) queue region
+ */
+static void
+ice_update_ring_dest_vsi(struct ice_vsi *vsi, u16 *dest_vsi, u32 *ring)
+{
+ struct ice_channel *ch;
+
+ list_for_each_entry(ch, &vsi->ch_list, list) {
+ if (!ch->ch_vsi)
+ continue;
+
+ /* make sure to locate corresponding channel based on "queue"
+ * specified
+ */
+ if ((*ring < ch->base_q) ||
+ (*ring >= (ch->base_q + ch->num_rxq)))
+ continue;
+
+ /* update the dest_vsi based on channel */
+ *dest_vsi = ch->ch_vsi->idx;
+
+ /* update the "ring" to be correct based on channel */
+ *ring -= ch->base_q;
+ }
+}
+
+/**
+ * ice_set_fdir_input_set - Set the input set for Flow Director
+ * @vsi: pointer to target VSI
+ * @fsp: pointer to ethtool Rx flow specification
+ * @input: filter structure
+ */
+static int
+ice_set_fdir_input_set(struct ice_vsi *vsi, struct ethtool_rx_flow_spec *fsp,
+ struct ice_fdir_fltr *input)
+{
+ u16 dest_vsi, q_index = 0;
+ u16 orig_q_index = 0;
+ struct ice_pf *pf;
+ struct ice_hw *hw;
+ int flow_type;
+ u8 dest_ctl;
+
+ if (!vsi || !fsp || !input)
+ return -EINVAL;
+
+ pf = vsi->back;
+ hw = &pf->hw;
+
+ dest_vsi = vsi->idx;
+ if (fsp->ring_cookie == RX_CLS_FLOW_DISC) {
+ dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DROP_PKT;
+ } else {
+ u32 ring = ethtool_get_flow_spec_ring(fsp->ring_cookie);
+ u8 vf = ethtool_get_flow_spec_ring_vf(fsp->ring_cookie);
+
+ if (vf) {
+ dev_err(ice_pf_to_dev(pf), "Failed to add filter. Flow director filters are not supported on VF queues.\n");
+ return -EINVAL;
+ }
+
+ if (ring >= vsi->num_rxq)
+ return -EINVAL;
+
+ orig_q_index = ring;
+ ice_update_ring_dest_vsi(vsi, &dest_vsi, &ring);
+ dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX;
+ q_index = ring;
+ }
+
+ input->fltr_id = fsp->location;
+ input->q_index = q_index;
+ flow_type = fsp->flow_type & ~FLOW_EXT;
+
+ /* Record the original queue index as specified by user.
+ * with channel configuration 'q_index' becomes relative
+ * to TC (channel).
+ */
+ input->orig_q_index = orig_q_index;
+ input->dest_vsi = dest_vsi;
+ input->dest_ctl = dest_ctl;
+ input->fltr_status = ICE_FLTR_PRGM_DESC_FD_STATUS_FD_ID;
+ input->cnt_index = ICE_FD_SB_STAT_IDX(hw->fd_ctr_base);
+ input->flow_type = ice_ethtool_flow_to_fltr(flow_type);
+
+ if (fsp->flow_type & FLOW_EXT) {
+ memcpy(input->ext_data.usr_def, fsp->h_ext.data,
+ sizeof(input->ext_data.usr_def));
+ input->ext_data.vlan_type = fsp->h_ext.vlan_etype;
+ input->ext_data.vlan_tag = fsp->h_ext.vlan_tci;
+ memcpy(input->ext_mask.usr_def, fsp->m_ext.data,
+ sizeof(input->ext_mask.usr_def));
+ input->ext_mask.vlan_type = fsp->m_ext.vlan_etype;
+ input->ext_mask.vlan_tag = fsp->m_ext.vlan_tci;
+ }
+
+ switch (flow_type) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ case SCTP_V4_FLOW:
+ input->ip.v4.dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+ input->ip.v4.src_port = fsp->h_u.tcp_ip4_spec.psrc;
+ input->ip.v4.dst_ip = fsp->h_u.tcp_ip4_spec.ip4dst;
+ input->ip.v4.src_ip = fsp->h_u.tcp_ip4_spec.ip4src;
+ input->mask.v4.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
+ input->mask.v4.src_port = fsp->m_u.tcp_ip4_spec.psrc;
+ input->mask.v4.dst_ip = fsp->m_u.tcp_ip4_spec.ip4dst;
+ input->mask.v4.src_ip = fsp->m_u.tcp_ip4_spec.ip4src;
+ break;
+ case IPV4_USER_FLOW:
+ input->ip.v4.dst_ip = fsp->h_u.usr_ip4_spec.ip4dst;
+ input->ip.v4.src_ip = fsp->h_u.usr_ip4_spec.ip4src;
+ input->ip.v4.l4_header = fsp->h_u.usr_ip4_spec.l4_4_bytes;
+ input->ip.v4.proto = fsp->h_u.usr_ip4_spec.proto;
+ input->ip.v4.ip_ver = fsp->h_u.usr_ip4_spec.ip_ver;
+ input->ip.v4.tos = fsp->h_u.usr_ip4_spec.tos;
+ input->mask.v4.dst_ip = fsp->m_u.usr_ip4_spec.ip4dst;
+ input->mask.v4.src_ip = fsp->m_u.usr_ip4_spec.ip4src;
+ input->mask.v4.l4_header = fsp->m_u.usr_ip4_spec.l4_4_bytes;
+ input->mask.v4.proto = fsp->m_u.usr_ip4_spec.proto;
+ input->mask.v4.ip_ver = fsp->m_u.usr_ip4_spec.ip_ver;
+ input->mask.v4.tos = fsp->m_u.usr_ip4_spec.tos;
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ case SCTP_V6_FLOW:
+ memcpy(input->ip.v6.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst,
+ sizeof(struct in6_addr));
+ memcpy(input->ip.v6.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
+ sizeof(struct in6_addr));
+ input->ip.v6.dst_port = fsp->h_u.tcp_ip6_spec.pdst;
+ input->ip.v6.src_port = fsp->h_u.tcp_ip6_spec.psrc;
+ input->ip.v6.tc = fsp->h_u.tcp_ip6_spec.tclass;
+ memcpy(input->mask.v6.dst_ip, fsp->m_u.tcp_ip6_spec.ip6dst,
+ sizeof(struct in6_addr));
+ memcpy(input->mask.v6.src_ip, fsp->m_u.tcp_ip6_spec.ip6src,
+ sizeof(struct in6_addr));
+ input->mask.v6.dst_port = fsp->m_u.tcp_ip6_spec.pdst;
+ input->mask.v6.src_port = fsp->m_u.tcp_ip6_spec.psrc;
+ input->mask.v6.tc = fsp->m_u.tcp_ip6_spec.tclass;
+ break;
+ case IPV6_USER_FLOW:
+ memcpy(input->ip.v6.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst,
+ sizeof(struct in6_addr));
+ memcpy(input->ip.v6.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
+ sizeof(struct in6_addr));
+ input->ip.v6.l4_header = fsp->h_u.usr_ip6_spec.l4_4_bytes;
+ input->ip.v6.tc = fsp->h_u.usr_ip6_spec.tclass;
+
+ /* if no protocol requested, use IPPROTO_NONE */
+ if (!fsp->m_u.usr_ip6_spec.l4_proto)
+ input->ip.v6.proto = IPPROTO_NONE;
+ else
+ input->ip.v6.proto = fsp->h_u.usr_ip6_spec.l4_proto;
+
+ memcpy(input->mask.v6.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst,
+ sizeof(struct in6_addr));
+ memcpy(input->mask.v6.src_ip, fsp->m_u.usr_ip6_spec.ip6src,
+ sizeof(struct in6_addr));
+ input->mask.v6.l4_header = fsp->m_u.usr_ip6_spec.l4_4_bytes;
+ input->mask.v6.tc = fsp->m_u.usr_ip6_spec.tclass;
+ input->mask.v6.proto = fsp->m_u.usr_ip6_spec.l4_proto;
+ break;
+ default:
+ /* not doing un-parsed flow types */
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_add_fdir_ethtool - Add/Remove Flow Director filter
+ * @vsi: pointer to target VSI
+ * @cmd: command to add or delete Flow Director filter
+ *
+ * Returns 0 on success and negative values for failure
+ */
+int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
+{
+ struct ice_rx_flow_userdef userdata;
+ struct ethtool_rx_flow_spec *fsp;
+ struct ice_fdir_fltr *input;
+ struct device *dev;
+ struct ice_pf *pf;
+ struct ice_hw *hw;
+ int fltrs_needed;
+ u16 tunnel_port;
+ int ret;
+
+ if (!vsi)
+ return -EINVAL;
+
+ pf = vsi->back;
+ hw = &pf->hw;
+ dev = ice_pf_to_dev(pf);
+
+ if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
+ return -EOPNOTSUPP;
+
+ /* Do not program filters during reset */
+ if (ice_is_reset_in_progress(pf->state)) {
+ dev_err(dev, "Device is resetting - adding Flow Director filters not supported during reset\n");
+ return -EBUSY;
+ }
+
+ fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+ if (ice_parse_rx_flow_user_data(fsp, &userdata))
+ return -EINVAL;
+
+ if (fsp->flow_type & FLOW_MAC_EXT)
+ return -EINVAL;
+
+ ret = ice_cfg_fdir_xtrct_seq(pf, fsp, &userdata);
+ if (ret)
+ return ret;
+
+ if (fsp->location >= ice_get_fdir_cnt_all(hw)) {
+ dev_err(dev, "Failed to add filter. The maximum number of flow director filters has been reached.\n");
+ return -ENOSPC;
+ }
+
+ /* return error if not an update and no available filters */
+ fltrs_needed = ice_get_open_tunnel_port(hw, &tunnel_port, TNL_ALL) ? 2 : 1;
+ if (!ice_fdir_find_fltr_by_idx(hw, fsp->location) &&
+ ice_fdir_num_avail_fltr(hw, pf->vsi[vsi->idx]) < fltrs_needed) {
+ dev_err(dev, "Failed to add filter. The maximum number of flow director filters has been reached.\n");
+ return -ENOSPC;
+ }
+
+ input = devm_kzalloc(dev, sizeof(*input), GFP_KERNEL);
+ if (!input)
+ return -ENOMEM;
+
+ ret = ice_set_fdir_input_set(vsi, fsp, input);
+ if (ret)
+ goto free_input;
+
+ mutex_lock(&hw->fdir_fltr_lock);
+ if (ice_fdir_is_dup_fltr(hw, input)) {
+ ret = -EINVAL;
+ goto release_lock;
+ }
+
+ if (userdata.flex_fltr) {
+ input->flex_fltr = true;
+ input->flex_word = cpu_to_be16(userdata.flex_word);
+ input->flex_offset = userdata.flex_offset;
+ }
+
+ input->cnt_ena = ICE_FXD_FLTR_QW0_STAT_ENA_PKTS;
+ input->fdid_prio = ICE_FXD_FLTR_QW1_FDID_PRI_THREE;
+ input->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL;
+
+ /* input struct is added to the HW filter list */
+ ret = ice_fdir_update_list_entry(pf, input, fsp->location);
+ if (ret)
+ goto release_lock;
+
+ ret = ice_fdir_write_all_fltr(pf, input, true);
+ if (ret)
+ goto remove_sw_rule;
+
+ goto release_lock;
+
+remove_sw_rule:
+ ice_fdir_update_cntrs(hw, input->flow_type, false);
+ /* update sb-filters count, specific to ring->channel */
+ ice_update_per_q_fltr(vsi, input->orig_q_index, false);
+ list_del(&input->fltr_node);
+release_lock:
+ mutex_unlock(&hw->fdir_fltr_lock);
+free_input:
+ if (ret)
+ devm_kfree(dev, input);
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
new file mode 100644
index 000000000..ae089d32e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -0,0 +1,1302 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#include "ice_common.h"
+
+/* These are training packet headers used to program flow director filters. */
+static const u8 ice_fdir_tcpv4_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x28, 0x00, 0x01, 0x00, 0x00, 0x40, 0x06,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00,
+ 0x20, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const u8 ice_fdir_udpv4_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x1C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00,
+};
+
+static const u8 ice_fdir_sctpv4_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x20, 0x00, 0x00, 0x40, 0x00, 0x40, 0x84,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x10,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00
+};
+
+static const u8 ice_fdir_udp4_gtpu4_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x4c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00,
+ 0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00,
+ 0x00, 0x1c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00,
+};
+
+static const u8 ice_fdir_tcp4_gtpu4_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x58, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00,
+ 0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00,
+ 0x00, 0x28, 0x00, 0x00, 0x40, 0x00, 0x40, 0x06,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_icmp4_gtpu4_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x4c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00,
+ 0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00,
+ 0x00, 0x1c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x01,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_gtpu4_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x44, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00,
+ 0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00,
+ 0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_l2tpv3_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x73,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_l2tpv3_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x73, 0x40, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_esp_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x32,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00
+};
+
+static const u8 ice_fdir_ipv6_esp_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x32, 0x40, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_ah_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x33,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00
+};
+
+static const u8 ice_fdir_ipv6_ah_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x33, 0x40, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_nat_t_esp_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x1C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x11, 0x94, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_nat_t_esp_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+ 0x00, 0x00, 0x00, 0x08, 0x11, 0x40, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x11, 0x94, 0x00, 0x00, 0x00, 0x08,
+};
+
+static const u8 ice_fdir_ipv4_pfcp_node_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x2C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x22, 0x65, 0x22, 0x65, 0x00, 0x00,
+ 0x00, 0x00, 0x20, 0x00, 0x00, 0x10, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_pfcp_session_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x2C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x22, 0x65, 0x22, 0x65, 0x00, 0x00,
+ 0x00, 0x00, 0x21, 0x00, 0x00, 0x10, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_pfcp_node_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+ 0x00, 0x00, 0x00, 0x18, 0x11, 0x40, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x65,
+ 0x22, 0x65, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00,
+ 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_pfcp_session_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+ 0x00, 0x00, 0x00, 0x18, 0x11, 0x40, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x65,
+ 0x22, 0x65, 0x00, 0x00, 0x00, 0x00, 0x21, 0x00,
+ 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_non_ip_l2_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_tcpv6_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+ 0x00, 0x00, 0x00, 0x14, 0x06, 0x40, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x50, 0x00, 0x20, 0x00, 0x00, 0x00,
+ 0x00, 0x00,
+};
+
+static const u8 ice_fdir_udpv6_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+ 0x00, 0x00, 0x00, 0x08, 0x11, 0x40, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x08, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_sctpv6_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+ 0x00, 0x00, 0x00, 0x0C, 0x84, 0x40, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x3B, 0x40, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_tcp4_tun_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x5a, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,
+ 0x45, 0x00, 0x00, 0x28, 0x00, 0x00, 0x40, 0x00,
+ 0x40, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x50, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_udp4_tun_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x4e, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,
+ 0x45, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x40, 0x00,
+ 0x40, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_sctp4_tun_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x52, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,
+ 0x45, 0x00, 0x00, 0x20, 0x00, 0x01, 0x00, 0x00,
+ 0x40, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ip4_tun_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x46, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,
+ 0x45, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00,
+ 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_tcp6_tun_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x6e, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xdd,
+ 0x60, 0x00, 0x00, 0x00, 0x00, 0x14, 0x06, 0x40,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x20, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_udp6_tun_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x62, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xdd,
+ 0x60, 0x00, 0x00, 0x00, 0x00, 0x08, 0x11, 0x40,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_sctp6_tun_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x66, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xdd,
+ 0x60, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x84, 0x40,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ip6_tun_pkt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+ 0x00, 0x5a, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xdd,
+ 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3b, 0x40,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+/* Flow Director no-op training packet table */
+static const struct ice_fdir_base_pkt ice_fdir_pkt[] = {
+ {
+ ICE_FLTR_PTYPE_NONF_IPV4_TCP,
+ sizeof(ice_fdir_tcpv4_pkt), ice_fdir_tcpv4_pkt,
+ sizeof(ice_fdir_tcp4_tun_pkt), ice_fdir_tcp4_tun_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV4_UDP,
+ sizeof(ice_fdir_udpv4_pkt), ice_fdir_udpv4_pkt,
+ sizeof(ice_fdir_udp4_tun_pkt), ice_fdir_udp4_tun_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV4_SCTP,
+ sizeof(ice_fdir_sctpv4_pkt), ice_fdir_sctpv4_pkt,
+ sizeof(ice_fdir_sctp4_tun_pkt), ice_fdir_sctp4_tun_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV4_OTHER,
+ sizeof(ice_fdir_ipv4_pkt), ice_fdir_ipv4_pkt,
+ sizeof(ice_fdir_ip4_tun_pkt), ice_fdir_ip4_tun_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP,
+ sizeof(ice_fdir_udp4_gtpu4_pkt),
+ ice_fdir_udp4_gtpu4_pkt,
+ sizeof(ice_fdir_udp4_gtpu4_pkt),
+ ice_fdir_udp4_gtpu4_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP,
+ sizeof(ice_fdir_tcp4_gtpu4_pkt),
+ ice_fdir_tcp4_gtpu4_pkt,
+ sizeof(ice_fdir_tcp4_gtpu4_pkt),
+ ice_fdir_tcp4_gtpu4_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP,
+ sizeof(ice_fdir_icmp4_gtpu4_pkt),
+ ice_fdir_icmp4_gtpu4_pkt,
+ sizeof(ice_fdir_icmp4_gtpu4_pkt),
+ ice_fdir_icmp4_gtpu4_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER,
+ sizeof(ice_fdir_ipv4_gtpu4_pkt),
+ ice_fdir_ipv4_gtpu4_pkt,
+ sizeof(ice_fdir_ipv4_gtpu4_pkt),
+ ice_fdir_ipv4_gtpu4_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3,
+ sizeof(ice_fdir_ipv4_l2tpv3_pkt), ice_fdir_ipv4_l2tpv3_pkt,
+ sizeof(ice_fdir_ipv4_l2tpv3_pkt), ice_fdir_ipv4_l2tpv3_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3,
+ sizeof(ice_fdir_ipv6_l2tpv3_pkt), ice_fdir_ipv6_l2tpv3_pkt,
+ sizeof(ice_fdir_ipv6_l2tpv3_pkt), ice_fdir_ipv6_l2tpv3_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV4_ESP,
+ sizeof(ice_fdir_ipv4_esp_pkt), ice_fdir_ipv4_esp_pkt,
+ sizeof(ice_fdir_ipv4_esp_pkt), ice_fdir_ipv4_esp_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV6_ESP,
+ sizeof(ice_fdir_ipv6_esp_pkt), ice_fdir_ipv6_esp_pkt,
+ sizeof(ice_fdir_ipv6_esp_pkt), ice_fdir_ipv6_esp_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV4_AH,
+ sizeof(ice_fdir_ipv4_ah_pkt), ice_fdir_ipv4_ah_pkt,
+ sizeof(ice_fdir_ipv4_ah_pkt), ice_fdir_ipv4_ah_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV6_AH,
+ sizeof(ice_fdir_ipv6_ah_pkt), ice_fdir_ipv6_ah_pkt,
+ sizeof(ice_fdir_ipv6_ah_pkt), ice_fdir_ipv6_ah_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP,
+ sizeof(ice_fdir_ipv4_nat_t_esp_pkt),
+ ice_fdir_ipv4_nat_t_esp_pkt,
+ sizeof(ice_fdir_ipv4_nat_t_esp_pkt),
+ ice_fdir_ipv4_nat_t_esp_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP,
+ sizeof(ice_fdir_ipv6_nat_t_esp_pkt),
+ ice_fdir_ipv6_nat_t_esp_pkt,
+ sizeof(ice_fdir_ipv6_nat_t_esp_pkt),
+ ice_fdir_ipv6_nat_t_esp_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE,
+ sizeof(ice_fdir_ipv4_pfcp_node_pkt),
+ ice_fdir_ipv4_pfcp_node_pkt,
+ sizeof(ice_fdir_ipv4_pfcp_node_pkt),
+ ice_fdir_ipv4_pfcp_node_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION,
+ sizeof(ice_fdir_ipv4_pfcp_session_pkt),
+ ice_fdir_ipv4_pfcp_session_pkt,
+ sizeof(ice_fdir_ipv4_pfcp_session_pkt),
+ ice_fdir_ipv4_pfcp_session_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE,
+ sizeof(ice_fdir_ipv6_pfcp_node_pkt),
+ ice_fdir_ipv6_pfcp_node_pkt,
+ sizeof(ice_fdir_ipv6_pfcp_node_pkt),
+ ice_fdir_ipv6_pfcp_node_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION,
+ sizeof(ice_fdir_ipv6_pfcp_session_pkt),
+ ice_fdir_ipv6_pfcp_session_pkt,
+ sizeof(ice_fdir_ipv6_pfcp_session_pkt),
+ ice_fdir_ipv6_pfcp_session_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NON_IP_L2,
+ sizeof(ice_fdir_non_ip_l2_pkt), ice_fdir_non_ip_l2_pkt,
+ sizeof(ice_fdir_non_ip_l2_pkt), ice_fdir_non_ip_l2_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV6_TCP,
+ sizeof(ice_fdir_tcpv6_pkt), ice_fdir_tcpv6_pkt,
+ sizeof(ice_fdir_tcp6_tun_pkt), ice_fdir_tcp6_tun_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV6_UDP,
+ sizeof(ice_fdir_udpv6_pkt), ice_fdir_udpv6_pkt,
+ sizeof(ice_fdir_udp6_tun_pkt), ice_fdir_udp6_tun_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV6_SCTP,
+ sizeof(ice_fdir_sctpv6_pkt), ice_fdir_sctpv6_pkt,
+ sizeof(ice_fdir_sctp6_tun_pkt), ice_fdir_sctp6_tun_pkt,
+ },
+ {
+ ICE_FLTR_PTYPE_NONF_IPV6_OTHER,
+ sizeof(ice_fdir_ipv6_pkt), ice_fdir_ipv6_pkt,
+ sizeof(ice_fdir_ip6_tun_pkt), ice_fdir_ip6_tun_pkt,
+ },
+};
+
+#define ICE_FDIR_NUM_PKT ARRAY_SIZE(ice_fdir_pkt)
+
+/**
+ * ice_set_dflt_val_fd_desc
+ * @fd_fltr_ctx: pointer to fd filter descriptor
+ */
+static void ice_set_dflt_val_fd_desc(struct ice_fd_fltr_desc_ctx *fd_fltr_ctx)
+{
+ fd_fltr_ctx->comp_q = ICE_FXD_FLTR_QW0_COMP_Q_ZERO;
+ fd_fltr_ctx->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL;
+ fd_fltr_ctx->fd_space = ICE_FXD_FLTR_QW0_FD_SPACE_GUAR_BEST;
+ fd_fltr_ctx->cnt_ena = ICE_FXD_FLTR_QW0_STAT_ENA_PKTS;
+ fd_fltr_ctx->evict_ena = ICE_FXD_FLTR_QW0_EVICT_ENA_TRUE;
+ fd_fltr_ctx->toq = ICE_FXD_FLTR_QW0_TO_Q_EQUALS_QINDEX;
+ fd_fltr_ctx->toq_prio = ICE_FXD_FLTR_QW0_TO_Q_PRIO1;
+ fd_fltr_ctx->dpu_recipe = ICE_FXD_FLTR_QW0_DPU_RECIPE_DFLT;
+ fd_fltr_ctx->drop = ICE_FXD_FLTR_QW0_DROP_NO;
+ fd_fltr_ctx->flex_prio = ICE_FXD_FLTR_QW0_FLEX_PRI_NONE;
+ fd_fltr_ctx->flex_mdid = ICE_FXD_FLTR_QW0_FLEX_MDID0;
+ fd_fltr_ctx->flex_val = ICE_FXD_FLTR_QW0_FLEX_VAL0;
+ fd_fltr_ctx->dtype = ICE_TX_DESC_DTYPE_FLTR_PROG;
+ fd_fltr_ctx->desc_prof_prio = ICE_FXD_FLTR_QW1_PROF_PRIO_ZERO;
+ fd_fltr_ctx->desc_prof = ICE_FXD_FLTR_QW1_PROF_ZERO;
+ fd_fltr_ctx->swap = ICE_FXD_FLTR_QW1_SWAP_SET;
+ fd_fltr_ctx->fdid_prio = ICE_FXD_FLTR_QW1_FDID_PRI_ONE;
+ fd_fltr_ctx->fdid_mdid = ICE_FXD_FLTR_QW1_FDID_MDID_FD;
+ fd_fltr_ctx->fdid = ICE_FXD_FLTR_QW1_FDID_ZERO;
+}
+
+/**
+ * ice_set_fd_desc_val
+ * @ctx: pointer to fd filter descriptor context
+ * @fdir_desc: populated with fd filter descriptor values
+ */
+static void
+ice_set_fd_desc_val(struct ice_fd_fltr_desc_ctx *ctx,
+ struct ice_fltr_desc *fdir_desc)
+{
+ u64 qword;
+
+ /* prep QW0 of FD filter programming desc */
+ qword = ((u64)ctx->qindex << ICE_FXD_FLTR_QW0_QINDEX_S) &
+ ICE_FXD_FLTR_QW0_QINDEX_M;
+ qword |= ((u64)ctx->comp_q << ICE_FXD_FLTR_QW0_COMP_Q_S) &
+ ICE_FXD_FLTR_QW0_COMP_Q_M;
+ qword |= ((u64)ctx->comp_report << ICE_FXD_FLTR_QW0_COMP_REPORT_S) &
+ ICE_FXD_FLTR_QW0_COMP_REPORT_M;
+ qword |= ((u64)ctx->fd_space << ICE_FXD_FLTR_QW0_FD_SPACE_S) &
+ ICE_FXD_FLTR_QW0_FD_SPACE_M;
+ qword |= ((u64)ctx->cnt_index << ICE_FXD_FLTR_QW0_STAT_CNT_S) &
+ ICE_FXD_FLTR_QW0_STAT_CNT_M;
+ qword |= ((u64)ctx->cnt_ena << ICE_FXD_FLTR_QW0_STAT_ENA_S) &
+ ICE_FXD_FLTR_QW0_STAT_ENA_M;
+ qword |= ((u64)ctx->evict_ena << ICE_FXD_FLTR_QW0_EVICT_ENA_S) &
+ ICE_FXD_FLTR_QW0_EVICT_ENA_M;
+ qword |= ((u64)ctx->toq << ICE_FXD_FLTR_QW0_TO_Q_S) &
+ ICE_FXD_FLTR_QW0_TO_Q_M;
+ qword |= ((u64)ctx->toq_prio << ICE_FXD_FLTR_QW0_TO_Q_PRI_S) &
+ ICE_FXD_FLTR_QW0_TO_Q_PRI_M;
+ qword |= ((u64)ctx->dpu_recipe << ICE_FXD_FLTR_QW0_DPU_RECIPE_S) &
+ ICE_FXD_FLTR_QW0_DPU_RECIPE_M;
+ qword |= ((u64)ctx->drop << ICE_FXD_FLTR_QW0_DROP_S) &
+ ICE_FXD_FLTR_QW0_DROP_M;
+ qword |= ((u64)ctx->flex_prio << ICE_FXD_FLTR_QW0_FLEX_PRI_S) &
+ ICE_FXD_FLTR_QW0_FLEX_PRI_M;
+ qword |= ((u64)ctx->flex_mdid << ICE_FXD_FLTR_QW0_FLEX_MDID_S) &
+ ICE_FXD_FLTR_QW0_FLEX_MDID_M;
+ qword |= ((u64)ctx->flex_val << ICE_FXD_FLTR_QW0_FLEX_VAL_S) &
+ ICE_FXD_FLTR_QW0_FLEX_VAL_M;
+ fdir_desc->qidx_compq_space_stat = cpu_to_le64(qword);
+
+ /* prep QW1 of FD filter programming desc */
+ qword = ((u64)ctx->dtype << ICE_FXD_FLTR_QW1_DTYPE_S) &
+ ICE_FXD_FLTR_QW1_DTYPE_M;
+ qword |= ((u64)ctx->pcmd << ICE_FXD_FLTR_QW1_PCMD_S) &
+ ICE_FXD_FLTR_QW1_PCMD_M;
+ qword |= ((u64)ctx->desc_prof_prio << ICE_FXD_FLTR_QW1_PROF_PRI_S) &
+ ICE_FXD_FLTR_QW1_PROF_PRI_M;
+ qword |= ((u64)ctx->desc_prof << ICE_FXD_FLTR_QW1_PROF_S) &
+ ICE_FXD_FLTR_QW1_PROF_M;
+ qword |= ((u64)ctx->fd_vsi << ICE_FXD_FLTR_QW1_FD_VSI_S) &
+ ICE_FXD_FLTR_QW1_FD_VSI_M;
+ qword |= ((u64)ctx->swap << ICE_FXD_FLTR_QW1_SWAP_S) &
+ ICE_FXD_FLTR_QW1_SWAP_M;
+ qword |= ((u64)ctx->fdid_prio << ICE_FXD_FLTR_QW1_FDID_PRI_S) &
+ ICE_FXD_FLTR_QW1_FDID_PRI_M;
+ qword |= ((u64)ctx->fdid_mdid << ICE_FXD_FLTR_QW1_FDID_MDID_S) &
+ ICE_FXD_FLTR_QW1_FDID_MDID_M;
+ qword |= ((u64)ctx->fdid << ICE_FXD_FLTR_QW1_FDID_S) &
+ ICE_FXD_FLTR_QW1_FDID_M;
+ fdir_desc->dtype_cmd_vsi_fdid = cpu_to_le64(qword);
+}
+
+/**
+ * ice_fdir_get_prgm_desc - set a fdir descriptor from a fdir filter struct
+ * @hw: pointer to the hardware structure
+ * @input: filter
+ * @fdesc: filter descriptor
+ * @add: if add is true, this is an add operation, false implies delete
+ */
+void
+ice_fdir_get_prgm_desc(struct ice_hw *hw, struct ice_fdir_fltr *input,
+ struct ice_fltr_desc *fdesc, bool add)
+{
+ struct ice_fd_fltr_desc_ctx fdir_fltr_ctx = { 0 };
+
+ /* set default context info */
+ ice_set_dflt_val_fd_desc(&fdir_fltr_ctx);
+
+ /* change sideband filtering values */
+ fdir_fltr_ctx.fdid = input->fltr_id;
+ if (input->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DROP_PKT) {
+ fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_YES;
+ fdir_fltr_ctx.qindex = 0;
+ } else if (input->dest_ctl ==
+ ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER) {
+ fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_NO;
+ fdir_fltr_ctx.qindex = 0;
+ } else {
+ if (input->dest_ctl ==
+ ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QGROUP)
+ fdir_fltr_ctx.toq = input->q_region;
+ fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_NO;
+ fdir_fltr_ctx.qindex = input->q_index;
+ }
+ fdir_fltr_ctx.cnt_ena = input->cnt_ena;
+ fdir_fltr_ctx.cnt_index = input->cnt_index;
+ fdir_fltr_ctx.fd_vsi = ice_get_hw_vsi_num(hw, input->dest_vsi);
+ fdir_fltr_ctx.evict_ena = ICE_FXD_FLTR_QW0_EVICT_ENA_FALSE;
+ if (input->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER)
+ fdir_fltr_ctx.toq_prio = 0;
+ else
+ fdir_fltr_ctx.toq_prio = 3;
+ fdir_fltr_ctx.pcmd = add ? ICE_FXD_FLTR_QW1_PCMD_ADD :
+ ICE_FXD_FLTR_QW1_PCMD_REMOVE;
+ fdir_fltr_ctx.swap = ICE_FXD_FLTR_QW1_SWAP_NOT_SET;
+ fdir_fltr_ctx.comp_q = ICE_FXD_FLTR_QW0_COMP_Q_ZERO;
+ fdir_fltr_ctx.comp_report = input->comp_report;
+ fdir_fltr_ctx.fdid_prio = input->fdid_prio;
+ fdir_fltr_ctx.desc_prof = 1;
+ fdir_fltr_ctx.desc_prof_prio = 3;
+ ice_set_fd_desc_val(&fdir_fltr_ctx, fdesc);
+}
+
+/**
+ * ice_alloc_fd_res_cntr - obtain counter resource for FD type
+ * @hw: pointer to the hardware structure
+ * @cntr_id: returns counter index
+ */
+int ice_alloc_fd_res_cntr(struct ice_hw *hw, u16 *cntr_id)
+{
+ return ice_alloc_res_cntr(hw, ICE_AQC_RES_TYPE_FDIR_COUNTER_BLOCK,
+ ICE_AQC_RES_TYPE_FLAG_DEDICATED, 1, cntr_id);
+}
+
+/**
+ * ice_free_fd_res_cntr - Free counter resource for FD type
+ * @hw: pointer to the hardware structure
+ * @cntr_id: counter index to be freed
+ */
+int ice_free_fd_res_cntr(struct ice_hw *hw, u16 cntr_id)
+{
+ return ice_free_res_cntr(hw, ICE_AQC_RES_TYPE_FDIR_COUNTER_BLOCK,
+ ICE_AQC_RES_TYPE_FLAG_DEDICATED, 1, cntr_id);
+}
+
+/**
+ * ice_alloc_fd_guar_item - allocate resource for FD guaranteed entries
+ * @hw: pointer to the hardware structure
+ * @cntr_id: returns counter index
+ * @num_fltr: number of filter entries to be allocated
+ */
+int ice_alloc_fd_guar_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr)
+{
+ return ice_alloc_res_cntr(hw, ICE_AQC_RES_TYPE_FDIR_GUARANTEED_ENTRIES,
+ ICE_AQC_RES_TYPE_FLAG_DEDICATED, num_fltr,
+ cntr_id);
+}
+
+/**
+ * ice_alloc_fd_shrd_item - allocate resource for flow director shared entries
+ * @hw: pointer to the hardware structure
+ * @cntr_id: returns counter index
+ * @num_fltr: number of filter entries to be allocated
+ */
+int ice_alloc_fd_shrd_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr)
+{
+ return ice_alloc_res_cntr(hw, ICE_AQC_RES_TYPE_FDIR_SHARED_ENTRIES,
+ ICE_AQC_RES_TYPE_FLAG_DEDICATED, num_fltr,
+ cntr_id);
+}
+
+/**
+ * ice_get_fdir_cnt_all - get the number of Flow Director filters
+ * @hw: hardware data structure
+ *
+ * Returns the number of filters available on device
+ */
+int ice_get_fdir_cnt_all(struct ice_hw *hw)
+{
+ return hw->func_caps.fd_fltr_guar + hw->func_caps.fd_fltr_best_effort;
+}
+
+/**
+ * ice_pkt_insert_ipv6_addr - insert a be32 IPv6 address into a memory buffer
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @addr: IPv6 address to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_ipv6_addr(u8 *pkt, int offset, __be32 *addr)
+{
+ int idx;
+
+ for (idx = 0; idx < ICE_IPV6_ADDR_LEN_AS_U32; idx++)
+ memcpy(pkt + offset + idx * sizeof(*addr), &addr[idx],
+ sizeof(*addr));
+}
+
+/**
+ * ice_pkt_insert_u6_qfi - insert a u6 value QFI into a memory buffer for GTPU
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 8 bit value to convert and insert into pkt at offset
+ *
+ * This function is designed for inserting QFI (6 bits) for GTPU.
+ */
+static void ice_pkt_insert_u6_qfi(u8 *pkt, int offset, u8 data)
+{
+ u8 ret;
+
+ ret = (data & 0x3F) + (*(pkt + offset) & 0xC0);
+ memcpy(pkt + offset, &ret, sizeof(ret));
+}
+
+/**
+ * ice_pkt_insert_u8 - insert a u8 value into a memory buffer.
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 8 bit value to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_u8(u8 *pkt, int offset, u8 data)
+{
+ memcpy(pkt + offset, &data, sizeof(data));
+}
+
+/**
+ * ice_pkt_insert_u8_tc - insert a u8 value into a memory buffer for TC ipv6.
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 8 bit value to convert and insert into pkt at offset
+ *
+ * This function is designed for inserting Traffic Class (TC) for IPv6,
+ * since that TC is not aligned in number of bytes. Here we split it out
+ * into two part and fill each byte with data copy from pkt, then insert
+ * the two bytes data one by one.
+ */
+static void ice_pkt_insert_u8_tc(u8 *pkt, int offset, u8 data)
+{
+ u8 high, low;
+
+ high = (data >> 4) + (*(pkt + offset) & 0xF0);
+ memcpy(pkt + offset, &high, sizeof(high));
+
+ low = (*(pkt + offset + 1) & 0x0F) + ((data & 0x0F) << 4);
+ memcpy(pkt + offset + 1, &low, sizeof(low));
+}
+
+/**
+ * ice_pkt_insert_u16 - insert a be16 value into a memory buffer
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 16 bit value to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_u16(u8 *pkt, int offset, __be16 data)
+{
+ memcpy(pkt + offset, &data, sizeof(data));
+}
+
+/**
+ * ice_pkt_insert_u32 - insert a be32 value into a memory buffer
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 32 bit value to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_u32(u8 *pkt, int offset, __be32 data)
+{
+ memcpy(pkt + offset, &data, sizeof(data));
+}
+
+/**
+ * ice_pkt_insert_mac_addr - insert a MAC addr into a memory buffer.
+ * @pkt: packet buffer
+ * @addr: MAC address to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_mac_addr(u8 *pkt, u8 *addr)
+{
+ ether_addr_copy(pkt, addr);
+}
+
+/**
+ * ice_fdir_get_gen_prgm_pkt - generate a training packet
+ * @hw: pointer to the hardware structure
+ * @input: flow director filter data structure
+ * @pkt: pointer to return filter packet
+ * @frag: generate a fragment packet
+ * @tun: true implies generate a tunnel packet
+ */
+int
+ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
+ u8 *pkt, bool frag, bool tun)
+{
+ enum ice_fltr_ptype flow;
+ u16 tnl_port;
+ u8 *loc;
+ u16 idx;
+
+ if (input->flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER) {
+ switch (input->ip.v4.proto) {
+ case IPPROTO_TCP:
+ flow = ICE_FLTR_PTYPE_NONF_IPV4_TCP;
+ break;
+ case IPPROTO_UDP:
+ flow = ICE_FLTR_PTYPE_NONF_IPV4_UDP;
+ break;
+ case IPPROTO_SCTP:
+ flow = ICE_FLTR_PTYPE_NONF_IPV4_SCTP;
+ break;
+ default:
+ flow = ICE_FLTR_PTYPE_NONF_IPV4_OTHER;
+ break;
+ }
+ } else if (input->flow_type == ICE_FLTR_PTYPE_NONF_IPV6_OTHER) {
+ switch (input->ip.v6.proto) {
+ case IPPROTO_TCP:
+ flow = ICE_FLTR_PTYPE_NONF_IPV6_TCP;
+ break;
+ case IPPROTO_UDP:
+ flow = ICE_FLTR_PTYPE_NONF_IPV6_UDP;
+ break;
+ case IPPROTO_SCTP:
+ flow = ICE_FLTR_PTYPE_NONF_IPV6_SCTP;
+ break;
+ default:
+ flow = ICE_FLTR_PTYPE_NONF_IPV6_OTHER;
+ break;
+ }
+ } else {
+ flow = input->flow_type;
+ }
+
+ for (idx = 0; idx < ICE_FDIR_NUM_PKT; idx++)
+ if (ice_fdir_pkt[idx].flow == flow)
+ break;
+ if (idx == ICE_FDIR_NUM_PKT)
+ return -EINVAL;
+ if (!tun) {
+ memcpy(pkt, ice_fdir_pkt[idx].pkt, ice_fdir_pkt[idx].pkt_len);
+ loc = pkt;
+ } else {
+ if (!ice_get_open_tunnel_port(hw, &tnl_port, TNL_ALL))
+ return -ENOENT;
+ if (!ice_fdir_pkt[idx].tun_pkt)
+ return -EINVAL;
+ memcpy(pkt, ice_fdir_pkt[idx].tun_pkt,
+ ice_fdir_pkt[idx].tun_pkt_len);
+ ice_pkt_insert_u16(pkt, ICE_IPV4_UDP_DST_PORT_OFFSET,
+ htons(tnl_port));
+ loc = &pkt[ICE_FDIR_TUN_PKT_OFF];
+ }
+
+ /* Reverse the src and dst, since the HW expects them to be from Tx
+ * perspective. The input from user is from Rx filter perspective.
+ */
+ switch (flow) {
+ case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+ ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+ input->ip.v4.src_ip);
+ ice_pkt_insert_u16(loc, ICE_IPV4_TCP_DST_PORT_OFFSET,
+ input->ip.v4.src_port);
+ ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+ input->ip.v4.dst_ip);
+ ice_pkt_insert_u16(loc, ICE_IPV4_TCP_SRC_PORT_OFFSET,
+ input->ip.v4.dst_port);
+ ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos);
+ ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl);
+ ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+ if (frag)
+ loc[20] = ICE_FDIR_IPV4_PKT_FLAG_MF;
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+ ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+ input->ip.v4.src_ip);
+ ice_pkt_insert_u16(loc, ICE_IPV4_UDP_DST_PORT_OFFSET,
+ input->ip.v4.src_port);
+ ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+ input->ip.v4.dst_ip);
+ ice_pkt_insert_u16(loc, ICE_IPV4_UDP_SRC_PORT_OFFSET,
+ input->ip.v4.dst_port);
+ ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos);
+ ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl);
+ ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+ ice_pkt_insert_mac_addr(loc + ETH_ALEN,
+ input->ext_data.src_mac);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
+ ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+ input->ip.v4.src_ip);
+ ice_pkt_insert_u16(loc, ICE_IPV4_SCTP_DST_PORT_OFFSET,
+ input->ip.v4.src_port);
+ ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+ input->ip.v4.dst_ip);
+ ice_pkt_insert_u16(loc, ICE_IPV4_SCTP_SRC_PORT_OFFSET,
+ input->ip.v4.dst_port);
+ ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos);
+ ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl);
+ ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
+ ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+ input->ip.v4.src_ip);
+ ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+ input->ip.v4.dst_ip);
+ ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos);
+ ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl);
+ ice_pkt_insert_u8(loc, ICE_IPV4_PROTO_OFFSET,
+ input->ip.v4.proto);
+ ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP:
+ case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP:
+ case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP:
+ case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER:
+ ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+ input->ip.v4.src_ip);
+ ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+ input->ip.v4.dst_ip);
+ ice_pkt_insert_u32(loc, ICE_IPV4_GTPU_TEID_OFFSET,
+ input->gtpu_data.teid);
+ ice_pkt_insert_u6_qfi(loc, ICE_IPV4_GTPU_QFI_OFFSET,
+ input->gtpu_data.qfi);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3:
+ ice_pkt_insert_u32(loc, ICE_IPV4_L2TPV3_SESS_ID_OFFSET,
+ input->l2tpv3_data.session_id);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3:
+ ice_pkt_insert_u32(loc, ICE_IPV6_L2TPV3_SESS_ID_OFFSET,
+ input->l2tpv3_data.session_id);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_ESP:
+ ice_pkt_insert_u32(loc, ICE_IPV4_ESP_SPI_OFFSET,
+ input->ip.v4.sec_parm_idx);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_ESP:
+ ice_pkt_insert_u32(loc, ICE_IPV6_ESP_SPI_OFFSET,
+ input->ip.v6.sec_parm_idx);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_AH:
+ ice_pkt_insert_u32(loc, ICE_IPV4_AH_SPI_OFFSET,
+ input->ip.v4.sec_parm_idx);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_AH:
+ ice_pkt_insert_u32(loc, ICE_IPV6_AH_SPI_OFFSET,
+ input->ip.v6.sec_parm_idx);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP:
+ ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+ input->ip.v4.src_ip);
+ ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+ input->ip.v4.dst_ip);
+ ice_pkt_insert_u32(loc, ICE_IPV4_NAT_T_ESP_SPI_OFFSET,
+ input->ip.v4.sec_parm_idx);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP:
+ ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+ input->ip.v6.src_ip);
+ ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+ input->ip.v6.dst_ip);
+ ice_pkt_insert_u32(loc, ICE_IPV6_NAT_T_ESP_SPI_OFFSET,
+ input->ip.v6.sec_parm_idx);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE:
+ case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION:
+ ice_pkt_insert_u16(loc, ICE_IPV4_UDP_SRC_PORT_OFFSET,
+ input->ip.v4.dst_port);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE:
+ case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION:
+ ice_pkt_insert_u16(loc, ICE_IPV6_UDP_SRC_PORT_OFFSET,
+ input->ip.v6.dst_port);
+ break;
+ case ICE_FLTR_PTYPE_NON_IP_L2:
+ ice_pkt_insert_u16(loc, ICE_MAC_ETHTYPE_OFFSET,
+ input->ext_data.ether_type);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+ ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+ input->ip.v6.src_ip);
+ ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+ input->ip.v6.dst_ip);
+ ice_pkt_insert_u16(loc, ICE_IPV6_TCP_DST_PORT_OFFSET,
+ input->ip.v6.src_port);
+ ice_pkt_insert_u16(loc, ICE_IPV6_TCP_SRC_PORT_OFFSET,
+ input->ip.v6.dst_port);
+ ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc);
+ ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim);
+ ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+ ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+ input->ip.v6.src_ip);
+ ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+ input->ip.v6.dst_ip);
+ ice_pkt_insert_u16(loc, ICE_IPV6_UDP_DST_PORT_OFFSET,
+ input->ip.v6.src_port);
+ ice_pkt_insert_u16(loc, ICE_IPV6_UDP_SRC_PORT_OFFSET,
+ input->ip.v6.dst_port);
+ ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc);
+ ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim);
+ ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
+ ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+ input->ip.v6.src_ip);
+ ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+ input->ip.v6.dst_ip);
+ ice_pkt_insert_u16(loc, ICE_IPV6_SCTP_DST_PORT_OFFSET,
+ input->ip.v6.src_port);
+ ice_pkt_insert_u16(loc, ICE_IPV6_SCTP_SRC_PORT_OFFSET,
+ input->ip.v6.dst_port);
+ ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc);
+ ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim);
+ ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
+ ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+ input->ip.v6.src_ip);
+ ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+ input->ip.v6.dst_ip);
+ ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc);
+ ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim);
+ ice_pkt_insert_u8(loc, ICE_IPV6_PROTO_OFFSET,
+ input->ip.v6.proto);
+ ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (input->flex_fltr)
+ ice_pkt_insert_u16(loc, input->flex_offset, input->flex_word);
+
+ return 0;
+}
+
+/**
+ * ice_fdir_has_frag - does flow type have 2 ptypes
+ * @flow: flow ptype
+ *
+ * returns true is there is a fragment packet for this ptype
+ */
+bool ice_fdir_has_frag(enum ice_fltr_ptype flow)
+{
+ if (flow == ICE_FLTR_PTYPE_NONF_IPV4_OTHER)
+ return true;
+ else
+ return false;
+}
+
+/**
+ * ice_fdir_find_fltr_by_idx - find filter with idx
+ * @hw: pointer to hardware structure
+ * @fltr_idx: index to find.
+ *
+ * Returns pointer to filter if found or null
+ */
+struct ice_fdir_fltr *
+ice_fdir_find_fltr_by_idx(struct ice_hw *hw, u32 fltr_idx)
+{
+ struct ice_fdir_fltr *rule;
+
+ list_for_each_entry(rule, &hw->fdir_list_head, fltr_node) {
+ /* rule ID found in the list */
+ if (fltr_idx == rule->fltr_id)
+ return rule;
+ if (fltr_idx < rule->fltr_id)
+ break;
+ }
+ return NULL;
+}
+
+/**
+ * ice_fdir_list_add_fltr - add a new node to the flow director filter list
+ * @hw: hardware structure
+ * @fltr: filter node to add to structure
+ */
+void ice_fdir_list_add_fltr(struct ice_hw *hw, struct ice_fdir_fltr *fltr)
+{
+ struct ice_fdir_fltr *rule, *parent = NULL;
+
+ list_for_each_entry(rule, &hw->fdir_list_head, fltr_node) {
+ /* rule ID found or pass its spot in the list */
+ if (rule->fltr_id >= fltr->fltr_id)
+ break;
+ parent = rule;
+ }
+
+ if (parent)
+ list_add(&fltr->fltr_node, &parent->fltr_node);
+ else
+ list_add(&fltr->fltr_node, &hw->fdir_list_head);
+}
+
+/**
+ * ice_fdir_update_cntrs - increment / decrement filter counter
+ * @hw: pointer to hardware structure
+ * @flow: filter flow type
+ * @add: true implies filters added
+ */
+void
+ice_fdir_update_cntrs(struct ice_hw *hw, enum ice_fltr_ptype flow, bool add)
+{
+ int incr;
+
+ incr = add ? 1 : -1;
+ hw->fdir_active_fltr += incr;
+
+ if (flow == ICE_FLTR_PTYPE_NONF_NONE || flow >= ICE_FLTR_PTYPE_MAX)
+ ice_debug(hw, ICE_DBG_SW, "Unknown filter type %d\n", flow);
+ else
+ hw->fdir_fltr_cnt[flow] += incr;
+}
+
+/**
+ * ice_cmp_ipv6_addr - compare 2 IP v6 addresses
+ * @a: IP v6 address
+ * @b: IP v6 address
+ *
+ * Returns 0 on equal, returns non-0 if different
+ */
+static int ice_cmp_ipv6_addr(__be32 *a, __be32 *b)
+{
+ return memcmp(a, b, 4 * sizeof(__be32));
+}
+
+/**
+ * ice_fdir_comp_rules - compare 2 filters
+ * @a: a Flow Director filter data structure
+ * @b: a Flow Director filter data structure
+ * @v6: bool true if v6 filter
+ *
+ * Returns true if the filters match
+ */
+static bool
+ice_fdir_comp_rules(struct ice_fdir_fltr *a, struct ice_fdir_fltr *b, bool v6)
+{
+ enum ice_fltr_ptype flow_type = a->flow_type;
+
+ /* The calling function already checks that the two filters have the
+ * same flow_type.
+ */
+ if (!v6) {
+ if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
+ flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+ flow_type == ICE_FLTR_PTYPE_NONF_IPV4_SCTP) {
+ if (a->ip.v4.dst_ip == b->ip.v4.dst_ip &&
+ a->ip.v4.src_ip == b->ip.v4.src_ip &&
+ a->ip.v4.dst_port == b->ip.v4.dst_port &&
+ a->ip.v4.src_port == b->ip.v4.src_port)
+ return true;
+ } else if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER) {
+ if (a->ip.v4.dst_ip == b->ip.v4.dst_ip &&
+ a->ip.v4.src_ip == b->ip.v4.src_ip &&
+ a->ip.v4.l4_header == b->ip.v4.l4_header &&
+ a->ip.v4.proto == b->ip.v4.proto &&
+ a->ip.v4.ip_ver == b->ip.v4.ip_ver &&
+ a->ip.v4.tos == b->ip.v4.tos)
+ return true;
+ }
+ } else {
+ if (flow_type == ICE_FLTR_PTYPE_NONF_IPV6_UDP ||
+ flow_type == ICE_FLTR_PTYPE_NONF_IPV6_TCP ||
+ flow_type == ICE_FLTR_PTYPE_NONF_IPV6_SCTP) {
+ if (a->ip.v6.dst_port == b->ip.v6.dst_port &&
+ a->ip.v6.src_port == b->ip.v6.src_port &&
+ !ice_cmp_ipv6_addr(a->ip.v6.dst_ip,
+ b->ip.v6.dst_ip) &&
+ !ice_cmp_ipv6_addr(a->ip.v6.src_ip,
+ b->ip.v6.src_ip))
+ return true;
+ } else if (flow_type == ICE_FLTR_PTYPE_NONF_IPV6_OTHER) {
+ if (a->ip.v6.dst_port == b->ip.v6.dst_port &&
+ a->ip.v6.src_port == b->ip.v6.src_port)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * ice_fdir_is_dup_fltr - test if filter is already in list for PF
+ * @hw: hardware data structure
+ * @input: Flow Director filter data structure
+ *
+ * Returns true if the filter is found in the list
+ */
+bool ice_fdir_is_dup_fltr(struct ice_hw *hw, struct ice_fdir_fltr *input)
+{
+ struct ice_fdir_fltr *rule;
+ bool ret = false;
+
+ list_for_each_entry(rule, &hw->fdir_list_head, fltr_node) {
+ enum ice_fltr_ptype flow_type;
+
+ if (rule->flow_type != input->flow_type)
+ continue;
+
+ flow_type = input->flow_type;
+ if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
+ flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+ flow_type == ICE_FLTR_PTYPE_NONF_IPV4_SCTP ||
+ flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER)
+ ret = ice_fdir_comp_rules(rule, input, false);
+ else
+ ret = ice_fdir_comp_rules(rule, input, true);
+ if (ret) {
+ if (rule->fltr_id == input->fltr_id &&
+ rule->q_index != input->q_index)
+ ret = false;
+ else
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h
new file mode 100644
index 000000000..1b9b84490
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.h
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#ifndef _ICE_FDIR_H_
+#define _ICE_FDIR_H_
+
+#define ICE_FDIR_TUN_PKT_OFF 50
+#define ICE_FDIR_MAX_RAW_PKT_SIZE (512 + ICE_FDIR_TUN_PKT_OFF)
+
+/* macros for offsets into packets for flow director programming */
+#define ICE_IPV4_SRC_ADDR_OFFSET 26
+#define ICE_IPV4_DST_ADDR_OFFSET 30
+#define ICE_IPV4_TCP_SRC_PORT_OFFSET 34
+#define ICE_IPV4_TCP_DST_PORT_OFFSET 36
+#define ICE_IPV4_UDP_SRC_PORT_OFFSET 34
+#define ICE_IPV4_UDP_DST_PORT_OFFSET 36
+#define ICE_IPV4_SCTP_SRC_PORT_OFFSET 34
+#define ICE_IPV4_SCTP_DST_PORT_OFFSET 36
+#define ICE_IPV4_PROTO_OFFSET 23
+#define ICE_IPV6_SRC_ADDR_OFFSET 22
+#define ICE_IPV6_DST_ADDR_OFFSET 38
+#define ICE_IPV6_TCP_SRC_PORT_OFFSET 54
+#define ICE_IPV6_TCP_DST_PORT_OFFSET 56
+#define ICE_IPV6_UDP_SRC_PORT_OFFSET 54
+#define ICE_IPV6_UDP_DST_PORT_OFFSET 56
+#define ICE_IPV6_SCTP_SRC_PORT_OFFSET 54
+#define ICE_IPV6_SCTP_DST_PORT_OFFSET 56
+#define ICE_MAC_ETHTYPE_OFFSET 12
+#define ICE_IPV4_TOS_OFFSET 15
+#define ICE_IPV4_TTL_OFFSET 22
+#define ICE_IPV6_TC_OFFSET 14
+#define ICE_IPV6_HLIM_OFFSET 21
+#define ICE_IPV6_PROTO_OFFSET 20
+#define ICE_IPV4_GTPU_TEID_OFFSET 46
+#define ICE_IPV4_GTPU_QFI_OFFSET 56
+#define ICE_IPV4_L2TPV3_SESS_ID_OFFSET 34
+#define ICE_IPV6_L2TPV3_SESS_ID_OFFSET 54
+#define ICE_IPV4_ESP_SPI_OFFSET 34
+#define ICE_IPV6_ESP_SPI_OFFSET 54
+#define ICE_IPV4_AH_SPI_OFFSET 38
+#define ICE_IPV6_AH_SPI_OFFSET 58
+#define ICE_IPV4_NAT_T_ESP_SPI_OFFSET 42
+#define ICE_IPV6_NAT_T_ESP_SPI_OFFSET 62
+
+#define ICE_FDIR_MAX_FLTRS 16384
+
+/* IP v4 has 2 flag bits that enable fragment processing: DF and MF. DF
+ * requests that the packet not be fragmented. MF indicates that a packet has
+ * been fragmented.
+ */
+#define ICE_FDIR_IPV4_PKT_FLAG_MF 0x20
+
+enum ice_fltr_prgm_desc_dest {
+ ICE_FLTR_PRGM_DESC_DEST_DROP_PKT,
+ ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX,
+ ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QGROUP,
+ ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER,
+};
+
+enum ice_fltr_prgm_desc_fd_status {
+ ICE_FLTR_PRGM_DESC_FD_STATUS_NONE,
+ ICE_FLTR_PRGM_DESC_FD_STATUS_FD_ID,
+};
+
+/* Flow Director (FD) Filter Programming descriptor */
+struct ice_fd_fltr_desc_ctx {
+ u32 fdid;
+ u16 qindex;
+ u16 cnt_index;
+ u16 fd_vsi;
+ u16 flex_val;
+ u8 comp_q;
+ u8 comp_report;
+ u8 fd_space;
+ u8 cnt_ena;
+ u8 evict_ena;
+ u8 toq;
+ u8 toq_prio;
+ u8 dpu_recipe;
+ u8 drop;
+ u8 flex_prio;
+ u8 flex_mdid;
+ u8 dtype;
+ u8 pcmd;
+ u8 desc_prof_prio;
+ u8 desc_prof;
+ u8 swap;
+ u8 fdid_prio;
+ u8 fdid_mdid;
+};
+
+#define ICE_FLTR_PRGM_FLEX_WORD_SIZE sizeof(__be16)
+
+struct ice_rx_flow_userdef {
+ u16 flex_word;
+ u16 flex_offset;
+ u16 flex_fltr;
+};
+
+struct ice_fdir_v4 {
+ __be32 dst_ip;
+ __be32 src_ip;
+ __be16 dst_port;
+ __be16 src_port;
+ __be32 l4_header;
+ __be32 sec_parm_idx; /* security parameter index */
+ u8 tos;
+ u8 ip_ver;
+ u8 proto;
+ u8 ttl;
+};
+
+#define ICE_IPV6_ADDR_LEN_AS_U32 4
+
+struct ice_fdir_v6 {
+ __be32 dst_ip[ICE_IPV6_ADDR_LEN_AS_U32];
+ __be32 src_ip[ICE_IPV6_ADDR_LEN_AS_U32];
+ __be16 dst_port;
+ __be16 src_port;
+ __be32 l4_header; /* next header */
+ __be32 sec_parm_idx; /* security parameter index */
+ u8 tc;
+ u8 proto;
+ u8 hlim;
+};
+
+struct ice_fdir_udp_gtp {
+ u8 flags;
+ u8 msg_type;
+ __be16 rsrvd_len;
+ __be32 teid;
+ __be16 rsrvd_seq_nbr;
+ u8 rsrvd_n_pdu_nbr;
+ u8 rsrvd_next_ext_type;
+ u8 rsvrd_ext_len;
+ u8 pdu_type:4,
+ spare:4;
+ u8 ppp:1,
+ rqi:1,
+ qfi:6;
+ u32 rsvrd;
+ u8 next_ext;
+};
+
+struct ice_fdir_l2tpv3 {
+ __be32 session_id;
+};
+
+struct ice_fdir_extra {
+ u8 dst_mac[ETH_ALEN]; /* dest MAC address */
+ u8 src_mac[ETH_ALEN]; /* src MAC address */
+ __be16 ether_type; /* for NON_IP_L2 */
+ u32 usr_def[2]; /* user data */
+ __be16 vlan_type; /* VLAN ethertype */
+ __be16 vlan_tag; /* VLAN tag info */
+};
+
+struct ice_fdir_fltr {
+ struct list_head fltr_node;
+ enum ice_fltr_ptype flow_type;
+
+ union {
+ struct ice_fdir_v4 v4;
+ struct ice_fdir_v6 v6;
+ } ip, mask;
+
+ struct ice_fdir_udp_gtp gtpu_data;
+ struct ice_fdir_udp_gtp gtpu_mask;
+
+ struct ice_fdir_l2tpv3 l2tpv3_data;
+ struct ice_fdir_l2tpv3 l2tpv3_mask;
+
+ struct ice_fdir_extra ext_data;
+ struct ice_fdir_extra ext_mask;
+
+ /* flex byte filter data */
+ __be16 flex_word;
+ /* queue region size (=2^q_region) */
+ u8 q_region;
+ u16 flex_offset;
+ u16 flex_fltr;
+
+ /* filter control */
+ u16 q_index;
+ u16 orig_q_index;
+ u16 dest_vsi;
+ u8 dest_ctl;
+ u8 cnt_ena;
+ u8 fltr_status;
+ u16 cnt_index;
+ u32 fltr_id;
+ u8 fdid_prio;
+ u8 comp_report;
+};
+
+/* Dummy packet filter definition structure */
+struct ice_fdir_base_pkt {
+ enum ice_fltr_ptype flow;
+ u16 pkt_len;
+ const u8 *pkt;
+ u16 tun_pkt_len;
+ const u8 *tun_pkt;
+};
+
+int ice_alloc_fd_res_cntr(struct ice_hw *hw, u16 *cntr_id);
+int ice_free_fd_res_cntr(struct ice_hw *hw, u16 cntr_id);
+int ice_alloc_fd_guar_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr);
+int ice_alloc_fd_shrd_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr);
+void
+ice_fdir_get_prgm_desc(struct ice_hw *hw, struct ice_fdir_fltr *input,
+ struct ice_fltr_desc *fdesc, bool add);
+int
+ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
+ u8 *pkt, bool frag, bool tun);
+int ice_get_fdir_cnt_all(struct ice_hw *hw);
+bool ice_fdir_is_dup_fltr(struct ice_hw *hw, struct ice_fdir_fltr *input);
+bool ice_fdir_has_frag(enum ice_fltr_ptype flow);
+struct ice_fdir_fltr *
+ice_fdir_find_fltr_by_idx(struct ice_hw *hw, u32 fltr_idx);
+void
+ice_fdir_update_cntrs(struct ice_hw *hw, enum ice_fltr_ptype flow, bool add);
+void ice_fdir_list_add_fltr(struct ice_hw *hw, struct ice_fdir_fltr *input);
+#endif /* _ICE_FDIR_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
new file mode 100644
index 000000000..4b3bb19e1
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -0,0 +1,6129 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_flex_pipe.h"
+#include "ice_flow.h"
+#include "ice.h"
+
+/* For supporting double VLAN mode, it is necessary to enable or disable certain
+ * boost tcam entries. The metadata labels names that match the following
+ * prefixes will be saved to allow enabling double VLAN mode.
+ */
+#define ICE_DVM_PRE "BOOST_MAC_VLAN_DVM" /* enable these entries */
+#define ICE_SVM_PRE "BOOST_MAC_VLAN_SVM" /* disable these entries */
+
+/* To support tunneling entries by PF, the package will append the PF number to
+ * the label; for example TNL_VXLAN_PF0, TNL_VXLAN_PF1, TNL_VXLAN_PF2, etc.
+ */
+#define ICE_TNL_PRE "TNL_"
+static const struct ice_tunnel_type_scan tnls[] = {
+ { TNL_VXLAN, "TNL_VXLAN_PF" },
+ { TNL_GENEVE, "TNL_GENEVE_PF" },
+ { TNL_LAST, "" }
+};
+
+static const u32 ice_sect_lkup[ICE_BLK_COUNT][ICE_SECT_COUNT] = {
+ /* SWITCH */
+ {
+ ICE_SID_XLT0_SW,
+ ICE_SID_XLT_KEY_BUILDER_SW,
+ ICE_SID_XLT1_SW,
+ ICE_SID_XLT2_SW,
+ ICE_SID_PROFID_TCAM_SW,
+ ICE_SID_PROFID_REDIR_SW,
+ ICE_SID_FLD_VEC_SW,
+ ICE_SID_CDID_KEY_BUILDER_SW,
+ ICE_SID_CDID_REDIR_SW
+ },
+
+ /* ACL */
+ {
+ ICE_SID_XLT0_ACL,
+ ICE_SID_XLT_KEY_BUILDER_ACL,
+ ICE_SID_XLT1_ACL,
+ ICE_SID_XLT2_ACL,
+ ICE_SID_PROFID_TCAM_ACL,
+ ICE_SID_PROFID_REDIR_ACL,
+ ICE_SID_FLD_VEC_ACL,
+ ICE_SID_CDID_KEY_BUILDER_ACL,
+ ICE_SID_CDID_REDIR_ACL
+ },
+
+ /* FD */
+ {
+ ICE_SID_XLT0_FD,
+ ICE_SID_XLT_KEY_BUILDER_FD,
+ ICE_SID_XLT1_FD,
+ ICE_SID_XLT2_FD,
+ ICE_SID_PROFID_TCAM_FD,
+ ICE_SID_PROFID_REDIR_FD,
+ ICE_SID_FLD_VEC_FD,
+ ICE_SID_CDID_KEY_BUILDER_FD,
+ ICE_SID_CDID_REDIR_FD
+ },
+
+ /* RSS */
+ {
+ ICE_SID_XLT0_RSS,
+ ICE_SID_XLT_KEY_BUILDER_RSS,
+ ICE_SID_XLT1_RSS,
+ ICE_SID_XLT2_RSS,
+ ICE_SID_PROFID_TCAM_RSS,
+ ICE_SID_PROFID_REDIR_RSS,
+ ICE_SID_FLD_VEC_RSS,
+ ICE_SID_CDID_KEY_BUILDER_RSS,
+ ICE_SID_CDID_REDIR_RSS
+ },
+
+ /* PE */
+ {
+ ICE_SID_XLT0_PE,
+ ICE_SID_XLT_KEY_BUILDER_PE,
+ ICE_SID_XLT1_PE,
+ ICE_SID_XLT2_PE,
+ ICE_SID_PROFID_TCAM_PE,
+ ICE_SID_PROFID_REDIR_PE,
+ ICE_SID_FLD_VEC_PE,
+ ICE_SID_CDID_KEY_BUILDER_PE,
+ ICE_SID_CDID_REDIR_PE
+ }
+};
+
+/**
+ * ice_sect_id - returns section ID
+ * @blk: block type
+ * @sect: section type
+ *
+ * This helper function returns the proper section ID given a block type and a
+ * section type.
+ */
+static u32 ice_sect_id(enum ice_block blk, enum ice_sect sect)
+{
+ return ice_sect_lkup[blk][sect];
+}
+
+/**
+ * ice_pkg_val_buf
+ * @buf: pointer to the ice buffer
+ *
+ * This helper function validates a buffer's header.
+ */
+static struct ice_buf_hdr *ice_pkg_val_buf(struct ice_buf *buf)
+{
+ struct ice_buf_hdr *hdr;
+ u16 section_count;
+ u16 data_end;
+
+ hdr = (struct ice_buf_hdr *)buf->buf;
+ /* verify data */
+ section_count = le16_to_cpu(hdr->section_count);
+ if (section_count < ICE_MIN_S_COUNT || section_count > ICE_MAX_S_COUNT)
+ return NULL;
+
+ data_end = le16_to_cpu(hdr->data_end);
+ if (data_end < ICE_MIN_S_DATA_END || data_end > ICE_MAX_S_DATA_END)
+ return NULL;
+
+ return hdr;
+}
+
+/**
+ * ice_find_buf_table
+ * @ice_seg: pointer to the ice segment
+ *
+ * Returns the address of the buffer table within the ice segment.
+ */
+static struct ice_buf_table *ice_find_buf_table(struct ice_seg *ice_seg)
+{
+ struct ice_nvm_table *nvms;
+
+ nvms = (struct ice_nvm_table *)
+ (ice_seg->device_table +
+ le32_to_cpu(ice_seg->device_table_count));
+
+ return (__force struct ice_buf_table *)
+ (nvms->vers + le32_to_cpu(nvms->table_count));
+}
+
+/**
+ * ice_pkg_enum_buf
+ * @ice_seg: pointer to the ice segment (or NULL on subsequent calls)
+ * @state: pointer to the enum state
+ *
+ * This function will enumerate all the buffers in the ice segment. The first
+ * call is made with the ice_seg parameter non-NULL; on subsequent calls,
+ * ice_seg is set to NULL which continues the enumeration. When the function
+ * returns a NULL pointer, then the end of the buffers has been reached, or an
+ * unexpected value has been detected (for example an invalid section count or
+ * an invalid buffer end value).
+ */
+static struct ice_buf_hdr *
+ice_pkg_enum_buf(struct ice_seg *ice_seg, struct ice_pkg_enum *state)
+{
+ if (ice_seg) {
+ state->buf_table = ice_find_buf_table(ice_seg);
+ if (!state->buf_table)
+ return NULL;
+
+ state->buf_idx = 0;
+ return ice_pkg_val_buf(state->buf_table->buf_array);
+ }
+
+ if (++state->buf_idx < le32_to_cpu(state->buf_table->buf_count))
+ return ice_pkg_val_buf(state->buf_table->buf_array +
+ state->buf_idx);
+ else
+ return NULL;
+}
+
+/**
+ * ice_pkg_advance_sect
+ * @ice_seg: pointer to the ice segment (or NULL on subsequent calls)
+ * @state: pointer to the enum state
+ *
+ * This helper function will advance the section within the ice segment,
+ * also advancing the buffer if needed.
+ */
+static bool
+ice_pkg_advance_sect(struct ice_seg *ice_seg, struct ice_pkg_enum *state)
+{
+ if (!ice_seg && !state->buf)
+ return false;
+
+ if (!ice_seg && state->buf)
+ if (++state->sect_idx < le16_to_cpu(state->buf->section_count))
+ return true;
+
+ state->buf = ice_pkg_enum_buf(ice_seg, state);
+ if (!state->buf)
+ return false;
+
+ /* start of new buffer, reset section index */
+ state->sect_idx = 0;
+ return true;
+}
+
+/**
+ * ice_pkg_enum_section
+ * @ice_seg: pointer to the ice segment (or NULL on subsequent calls)
+ * @state: pointer to the enum state
+ * @sect_type: section type to enumerate
+ *
+ * This function will enumerate all the sections of a particular type in the
+ * ice segment. The first call is made with the ice_seg parameter non-NULL;
+ * on subsequent calls, ice_seg is set to NULL which continues the enumeration.
+ * When the function returns a NULL pointer, then the end of the matching
+ * sections has been reached.
+ */
+static void *
+ice_pkg_enum_section(struct ice_seg *ice_seg, struct ice_pkg_enum *state,
+ u32 sect_type)
+{
+ u16 offset, size;
+
+ if (ice_seg)
+ state->type = sect_type;
+
+ if (!ice_pkg_advance_sect(ice_seg, state))
+ return NULL;
+
+ /* scan for next matching section */
+ while (state->buf->section_entry[state->sect_idx].type !=
+ cpu_to_le32(state->type))
+ if (!ice_pkg_advance_sect(NULL, state))
+ return NULL;
+
+ /* validate section */
+ offset = le16_to_cpu(state->buf->section_entry[state->sect_idx].offset);
+ if (offset < ICE_MIN_S_OFF || offset > ICE_MAX_S_OFF)
+ return NULL;
+
+ size = le16_to_cpu(state->buf->section_entry[state->sect_idx].size);
+ if (size < ICE_MIN_S_SZ || size > ICE_MAX_S_SZ)
+ return NULL;
+
+ /* make sure the section fits in the buffer */
+ if (offset + size > ICE_PKG_BUF_SIZE)
+ return NULL;
+
+ state->sect_type =
+ le32_to_cpu(state->buf->section_entry[state->sect_idx].type);
+
+ /* calc pointer to this section */
+ state->sect = ((u8 *)state->buf) +
+ le16_to_cpu(state->buf->section_entry[state->sect_idx].offset);
+
+ return state->sect;
+}
+
+/**
+ * ice_pkg_enum_entry
+ * @ice_seg: pointer to the ice segment (or NULL on subsequent calls)
+ * @state: pointer to the enum state
+ * @sect_type: section type to enumerate
+ * @offset: pointer to variable that receives the offset in the table (optional)
+ * @handler: function that handles access to the entries into the section type
+ *
+ * This function will enumerate all the entries in particular section type in
+ * the ice segment. The first call is made with the ice_seg parameter non-NULL;
+ * on subsequent calls, ice_seg is set to NULL which continues the enumeration.
+ * When the function returns a NULL pointer, then the end of the entries has
+ * been reached.
+ *
+ * Since each section may have a different header and entry size, the handler
+ * function is needed to determine the number and location entries in each
+ * section.
+ *
+ * The offset parameter is optional, but should be used for sections that
+ * contain an offset for each section table. For such cases, the section handler
+ * function must return the appropriate offset + index to give the absolution
+ * offset for each entry. For example, if the base for a section's header
+ * indicates a base offset of 10, and the index for the entry is 2, then
+ * section handler function should set the offset to 10 + 2 = 12.
+ */
+static void *
+ice_pkg_enum_entry(struct ice_seg *ice_seg, struct ice_pkg_enum *state,
+ u32 sect_type, u32 *offset,
+ void *(*handler)(u32 sect_type, void *section,
+ u32 index, u32 *offset))
+{
+ void *entry;
+
+ if (ice_seg) {
+ if (!handler)
+ return NULL;
+
+ if (!ice_pkg_enum_section(ice_seg, state, sect_type))
+ return NULL;
+
+ state->entry_idx = 0;
+ state->handler = handler;
+ } else {
+ state->entry_idx++;
+ }
+
+ if (!state->handler)
+ return NULL;
+
+ /* get entry */
+ entry = state->handler(state->sect_type, state->sect, state->entry_idx,
+ offset);
+ if (!entry) {
+ /* end of a section, look for another section of this type */
+ if (!ice_pkg_enum_section(NULL, state, 0))
+ return NULL;
+
+ state->entry_idx = 0;
+ entry = state->handler(state->sect_type, state->sect,
+ state->entry_idx, offset);
+ }
+
+ return entry;
+}
+
+/**
+ * ice_hw_ptype_ena - check if the PTYPE is enabled or not
+ * @hw: pointer to the HW structure
+ * @ptype: the hardware PTYPE
+ */
+bool ice_hw_ptype_ena(struct ice_hw *hw, u16 ptype)
+{
+ return ptype < ICE_FLOW_PTYPE_MAX &&
+ test_bit(ptype, hw->hw_ptype);
+}
+
+/**
+ * ice_marker_ptype_tcam_handler
+ * @sect_type: section type
+ * @section: pointer to section
+ * @index: index of the Marker PType TCAM entry to be returned
+ * @offset: pointer to receive absolute offset, always 0 for ptype TCAM sections
+ *
+ * This is a callback function that can be passed to ice_pkg_enum_entry.
+ * Handles enumeration of individual Marker PType TCAM entries.
+ */
+static void *
+ice_marker_ptype_tcam_handler(u32 sect_type, void *section, u32 index,
+ u32 *offset)
+{
+ struct ice_marker_ptype_tcam_section *marker_ptype;
+
+ if (sect_type != ICE_SID_RXPARSER_MARKER_PTYPE)
+ return NULL;
+
+ if (index > ICE_MAX_MARKER_PTYPE_TCAMS_IN_BUF)
+ return NULL;
+
+ if (offset)
+ *offset = 0;
+
+ marker_ptype = section;
+ if (index >= le16_to_cpu(marker_ptype->count))
+ return NULL;
+
+ return marker_ptype->tcam + index;
+}
+
+/**
+ * ice_fill_hw_ptype - fill the enabled PTYPE bit information
+ * @hw: pointer to the HW structure
+ */
+static void ice_fill_hw_ptype(struct ice_hw *hw)
+{
+ struct ice_marker_ptype_tcam_entry *tcam;
+ struct ice_seg *seg = hw->seg;
+ struct ice_pkg_enum state;
+
+ bitmap_zero(hw->hw_ptype, ICE_FLOW_PTYPE_MAX);
+ if (!seg)
+ return;
+
+ memset(&state, 0, sizeof(state));
+
+ do {
+ tcam = ice_pkg_enum_entry(seg, &state,
+ ICE_SID_RXPARSER_MARKER_PTYPE, NULL,
+ ice_marker_ptype_tcam_handler);
+ if (tcam &&
+ le16_to_cpu(tcam->addr) < ICE_MARKER_PTYPE_TCAM_ADDR_MAX &&
+ le16_to_cpu(tcam->ptype) < ICE_FLOW_PTYPE_MAX)
+ set_bit(le16_to_cpu(tcam->ptype), hw->hw_ptype);
+
+ seg = NULL;
+ } while (tcam);
+}
+
+/**
+ * ice_boost_tcam_handler
+ * @sect_type: section type
+ * @section: pointer to section
+ * @index: index of the boost TCAM entry to be returned
+ * @offset: pointer to receive absolute offset, always 0 for boost TCAM sections
+ *
+ * This is a callback function that can be passed to ice_pkg_enum_entry.
+ * Handles enumeration of individual boost TCAM entries.
+ */
+static void *
+ice_boost_tcam_handler(u32 sect_type, void *section, u32 index, u32 *offset)
+{
+ struct ice_boost_tcam_section *boost;
+
+ if (!section)
+ return NULL;
+
+ if (sect_type != ICE_SID_RXPARSER_BOOST_TCAM)
+ return NULL;
+
+ /* cppcheck-suppress nullPointer */
+ if (index > ICE_MAX_BST_TCAMS_IN_BUF)
+ return NULL;
+
+ if (offset)
+ *offset = 0;
+
+ boost = section;
+ if (index >= le16_to_cpu(boost->count))
+ return NULL;
+
+ return boost->tcam + index;
+}
+
+/**
+ * ice_find_boost_entry
+ * @ice_seg: pointer to the ice segment (non-NULL)
+ * @addr: Boost TCAM address of entry to search for
+ * @entry: returns pointer to the entry
+ *
+ * Finds a particular Boost TCAM entry and returns a pointer to that entry
+ * if it is found. The ice_seg parameter must not be NULL since the first call
+ * to ice_pkg_enum_entry requires a pointer to an actual ice_segment structure.
+ */
+static int
+ice_find_boost_entry(struct ice_seg *ice_seg, u16 addr,
+ struct ice_boost_tcam_entry **entry)
+{
+ struct ice_boost_tcam_entry *tcam;
+ struct ice_pkg_enum state;
+
+ memset(&state, 0, sizeof(state));
+
+ if (!ice_seg)
+ return -EINVAL;
+
+ do {
+ tcam = ice_pkg_enum_entry(ice_seg, &state,
+ ICE_SID_RXPARSER_BOOST_TCAM, NULL,
+ ice_boost_tcam_handler);
+ if (tcam && le16_to_cpu(tcam->addr) == addr) {
+ *entry = tcam;
+ return 0;
+ }
+
+ ice_seg = NULL;
+ } while (tcam);
+
+ *entry = NULL;
+ return -EIO;
+}
+
+/**
+ * ice_label_enum_handler
+ * @sect_type: section type
+ * @section: pointer to section
+ * @index: index of the label entry to be returned
+ * @offset: pointer to receive absolute offset, always zero for label sections
+ *
+ * This is a callback function that can be passed to ice_pkg_enum_entry.
+ * Handles enumeration of individual label entries.
+ */
+static void *
+ice_label_enum_handler(u32 __always_unused sect_type, void *section, u32 index,
+ u32 *offset)
+{
+ struct ice_label_section *labels;
+
+ if (!section)
+ return NULL;
+
+ /* cppcheck-suppress nullPointer */
+ if (index > ICE_MAX_LABELS_IN_BUF)
+ return NULL;
+
+ if (offset)
+ *offset = 0;
+
+ labels = section;
+ if (index >= le16_to_cpu(labels->count))
+ return NULL;
+
+ return labels->label + index;
+}
+
+/**
+ * ice_enum_labels
+ * @ice_seg: pointer to the ice segment (NULL on subsequent calls)
+ * @type: the section type that will contain the label (0 on subsequent calls)
+ * @state: ice_pkg_enum structure that will hold the state of the enumeration
+ * @value: pointer to a value that will return the label's value if found
+ *
+ * Enumerates a list of labels in the package. The caller will call
+ * ice_enum_labels(ice_seg, type, ...) to start the enumeration, then call
+ * ice_enum_labels(NULL, 0, ...) to continue. When the function returns a NULL
+ * the end of the list has been reached.
+ */
+static char *
+ice_enum_labels(struct ice_seg *ice_seg, u32 type, struct ice_pkg_enum *state,
+ u16 *value)
+{
+ struct ice_label *label;
+
+ /* Check for valid label section on first call */
+ if (type && !(type >= ICE_SID_LBL_FIRST && type <= ICE_SID_LBL_LAST))
+ return NULL;
+
+ label = ice_pkg_enum_entry(ice_seg, state, type, NULL,
+ ice_label_enum_handler);
+ if (!label)
+ return NULL;
+
+ *value = le16_to_cpu(label->value);
+ return label->name;
+}
+
+/**
+ * ice_add_tunnel_hint
+ * @hw: pointer to the HW structure
+ * @label_name: label text
+ * @val: value of the tunnel port boost entry
+ */
+static void ice_add_tunnel_hint(struct ice_hw *hw, char *label_name, u16 val)
+{
+ if (hw->tnl.count < ICE_TUNNEL_MAX_ENTRIES) {
+ u16 i;
+
+ for (i = 0; tnls[i].type != TNL_LAST; i++) {
+ size_t len = strlen(tnls[i].label_prefix);
+
+ /* Look for matching label start, before continuing */
+ if (strncmp(label_name, tnls[i].label_prefix, len))
+ continue;
+
+ /* Make sure this label matches our PF. Note that the PF
+ * character ('0' - '7') will be located where our
+ * prefix string's null terminator is located.
+ */
+ if ((label_name[len] - '0') == hw->pf_id) {
+ hw->tnl.tbl[hw->tnl.count].type = tnls[i].type;
+ hw->tnl.tbl[hw->tnl.count].valid = false;
+ hw->tnl.tbl[hw->tnl.count].boost_addr = val;
+ hw->tnl.tbl[hw->tnl.count].port = 0;
+ hw->tnl.count++;
+ break;
+ }
+ }
+ }
+}
+
+/**
+ * ice_add_dvm_hint
+ * @hw: pointer to the HW structure
+ * @val: value of the boost entry
+ * @enable: true if entry needs to be enabled, or false if needs to be disabled
+ */
+static void ice_add_dvm_hint(struct ice_hw *hw, u16 val, bool enable)
+{
+ if (hw->dvm_upd.count < ICE_DVM_MAX_ENTRIES) {
+ hw->dvm_upd.tbl[hw->dvm_upd.count].boost_addr = val;
+ hw->dvm_upd.tbl[hw->dvm_upd.count].enable = enable;
+ hw->dvm_upd.count++;
+ }
+}
+
+/**
+ * ice_init_pkg_hints
+ * @hw: pointer to the HW structure
+ * @ice_seg: pointer to the segment of the package scan (non-NULL)
+ *
+ * This function will scan the package and save off relevant information
+ * (hints or metadata) for driver use. The ice_seg parameter must not be NULL
+ * since the first call to ice_enum_labels requires a pointer to an actual
+ * ice_seg structure.
+ */
+static void ice_init_pkg_hints(struct ice_hw *hw, struct ice_seg *ice_seg)
+{
+ struct ice_pkg_enum state;
+ char *label_name;
+ u16 val;
+ int i;
+
+ memset(&hw->tnl, 0, sizeof(hw->tnl));
+ memset(&state, 0, sizeof(state));
+
+ if (!ice_seg)
+ return;
+
+ label_name = ice_enum_labels(ice_seg, ICE_SID_LBL_RXPARSER_TMEM, &state,
+ &val);
+
+ while (label_name) {
+ if (!strncmp(label_name, ICE_TNL_PRE, strlen(ICE_TNL_PRE)))
+ /* check for a tunnel entry */
+ ice_add_tunnel_hint(hw, label_name, val);
+
+ /* check for a dvm mode entry */
+ else if (!strncmp(label_name, ICE_DVM_PRE, strlen(ICE_DVM_PRE)))
+ ice_add_dvm_hint(hw, val, true);
+
+ /* check for a svm mode entry */
+ else if (!strncmp(label_name, ICE_SVM_PRE, strlen(ICE_SVM_PRE)))
+ ice_add_dvm_hint(hw, val, false);
+
+ label_name = ice_enum_labels(NULL, 0, &state, &val);
+ }
+
+ /* Cache the appropriate boost TCAM entry pointers for tunnels */
+ for (i = 0; i < hw->tnl.count; i++) {
+ ice_find_boost_entry(ice_seg, hw->tnl.tbl[i].boost_addr,
+ &hw->tnl.tbl[i].boost_entry);
+ if (hw->tnl.tbl[i].boost_entry) {
+ hw->tnl.tbl[i].valid = true;
+ if (hw->tnl.tbl[i].type < __TNL_TYPE_CNT)
+ hw->tnl.valid_count[hw->tnl.tbl[i].type]++;
+ }
+ }
+
+ /* Cache the appropriate boost TCAM entry pointers for DVM and SVM */
+ for (i = 0; i < hw->dvm_upd.count; i++)
+ ice_find_boost_entry(ice_seg, hw->dvm_upd.tbl[i].boost_addr,
+ &hw->dvm_upd.tbl[i].boost_entry);
+}
+
+/* Key creation */
+
+#define ICE_DC_KEY 0x1 /* don't care */
+#define ICE_DC_KEYINV 0x1
+#define ICE_NM_KEY 0x0 /* never match */
+#define ICE_NM_KEYINV 0x0
+#define ICE_0_KEY 0x1 /* match 0 */
+#define ICE_0_KEYINV 0x0
+#define ICE_1_KEY 0x0 /* match 1 */
+#define ICE_1_KEYINV 0x1
+
+/**
+ * ice_gen_key_word - generate 16-bits of a key/mask word
+ * @val: the value
+ * @valid: valid bits mask (change only the valid bits)
+ * @dont_care: don't care mask
+ * @nvr_mtch: never match mask
+ * @key: pointer to an array of where the resulting key portion
+ * @key_inv: pointer to an array of where the resulting key invert portion
+ *
+ * This function generates 16-bits from a 8-bit value, an 8-bit don't care mask
+ * and an 8-bit never match mask. The 16-bits of output are divided into 8 bits
+ * of key and 8 bits of key invert.
+ *
+ * '0' = b01, always match a 0 bit
+ * '1' = b10, always match a 1 bit
+ * '?' = b11, don't care bit (always matches)
+ * '~' = b00, never match bit
+ *
+ * Input:
+ * val: b0 1 0 1 0 1
+ * dont_care: b0 0 1 1 0 0
+ * never_mtch: b0 0 0 0 1 1
+ * ------------------------------
+ * Result: key: b01 10 11 11 00 00
+ */
+static int
+ice_gen_key_word(u8 val, u8 valid, u8 dont_care, u8 nvr_mtch, u8 *key,
+ u8 *key_inv)
+{
+ u8 in_key = *key, in_key_inv = *key_inv;
+ u8 i;
+
+ /* 'dont_care' and 'nvr_mtch' masks cannot overlap */
+ if ((dont_care ^ nvr_mtch) != (dont_care | nvr_mtch))
+ return -EIO;
+
+ *key = 0;
+ *key_inv = 0;
+
+ /* encode the 8 bits into 8-bit key and 8-bit key invert */
+ for (i = 0; i < 8; i++) {
+ *key >>= 1;
+ *key_inv >>= 1;
+
+ if (!(valid & 0x1)) { /* change only valid bits */
+ *key |= (in_key & 0x1) << 7;
+ *key_inv |= (in_key_inv & 0x1) << 7;
+ } else if (dont_care & 0x1) { /* don't care bit */
+ *key |= ICE_DC_KEY << 7;
+ *key_inv |= ICE_DC_KEYINV << 7;
+ } else if (nvr_mtch & 0x1) { /* never match bit */
+ *key |= ICE_NM_KEY << 7;
+ *key_inv |= ICE_NM_KEYINV << 7;
+ } else if (val & 0x01) { /* exact 1 match */
+ *key |= ICE_1_KEY << 7;
+ *key_inv |= ICE_1_KEYINV << 7;
+ } else { /* exact 0 match */
+ *key |= ICE_0_KEY << 7;
+ *key_inv |= ICE_0_KEYINV << 7;
+ }
+
+ dont_care >>= 1;
+ nvr_mtch >>= 1;
+ valid >>= 1;
+ val >>= 1;
+ in_key >>= 1;
+ in_key_inv >>= 1;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_bits_max_set - determine if the number of bits set is within a maximum
+ * @mask: pointer to the byte array which is the mask
+ * @size: the number of bytes in the mask
+ * @max: the max number of set bits
+ *
+ * This function determines if there are at most 'max' number of bits set in an
+ * array. Returns true if the number for bits set is <= max or will return false
+ * otherwise.
+ */
+static bool ice_bits_max_set(const u8 *mask, u16 size, u16 max)
+{
+ u16 count = 0;
+ u16 i;
+
+ /* check each byte */
+ for (i = 0; i < size; i++) {
+ /* if 0, go to next byte */
+ if (!mask[i])
+ continue;
+
+ /* We know there is at least one set bit in this byte because of
+ * the above check; if we already have found 'max' number of
+ * bits set, then we can return failure now.
+ */
+ if (count == max)
+ return false;
+
+ /* count the bits in this byte, checking threshold */
+ count += hweight8(mask[i]);
+ if (count > max)
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * ice_set_key - generate a variable sized key with multiples of 16-bits
+ * @key: pointer to where the key will be stored
+ * @size: the size of the complete key in bytes (must be even)
+ * @val: array of 8-bit values that makes up the value portion of the key
+ * @upd: array of 8-bit masks that determine what key portion to update
+ * @dc: array of 8-bit masks that make up the don't care mask
+ * @nm: array of 8-bit masks that make up the never match mask
+ * @off: the offset of the first byte in the key to update
+ * @len: the number of bytes in the key update
+ *
+ * This function generates a key from a value, a don't care mask and a never
+ * match mask.
+ * upd, dc, and nm are optional parameters, and can be NULL:
+ * upd == NULL --> upd mask is all 1's (update all bits)
+ * dc == NULL --> dc mask is all 0's (no don't care bits)
+ * nm == NULL --> nm mask is all 0's (no never match bits)
+ */
+static int
+ice_set_key(u8 *key, u16 size, u8 *val, u8 *upd, u8 *dc, u8 *nm, u16 off,
+ u16 len)
+{
+ u16 half_size;
+ u16 i;
+
+ /* size must be a multiple of 2 bytes. */
+ if (size % 2)
+ return -EIO;
+
+ half_size = size / 2;
+ if (off + len > half_size)
+ return -EIO;
+
+ /* Make sure at most one bit is set in the never match mask. Having more
+ * than one never match mask bit set will cause HW to consume excessive
+ * power otherwise; this is a power management efficiency check.
+ */
+#define ICE_NVR_MTCH_BITS_MAX 1
+ if (nm && !ice_bits_max_set(nm, len, ICE_NVR_MTCH_BITS_MAX))
+ return -EIO;
+
+ for (i = 0; i < len; i++)
+ if (ice_gen_key_word(val[i], upd ? upd[i] : 0xff,
+ dc ? dc[i] : 0, nm ? nm[i] : 0,
+ key + off + i, key + half_size + off + i))
+ return -EIO;
+
+ return 0;
+}
+
+/**
+ * ice_acquire_global_cfg_lock
+ * @hw: pointer to the HW structure
+ * @access: access type (read or write)
+ *
+ * This function will request ownership of the global config lock for reading
+ * or writing of the package. When attempting to obtain write access, the
+ * caller must check for the following two return values:
+ *
+ * 0 - Means the caller has acquired the global config lock
+ * and can perform writing of the package.
+ * -EALREADY - Indicates another driver has already written the
+ * package or has found that no update was necessary; in
+ * this case, the caller can just skip performing any
+ * update of the package.
+ */
+static int
+ice_acquire_global_cfg_lock(struct ice_hw *hw,
+ enum ice_aq_res_access_type access)
+{
+ int status;
+
+ status = ice_acquire_res(hw, ICE_GLOBAL_CFG_LOCK_RES_ID, access,
+ ICE_GLOBAL_CFG_LOCK_TIMEOUT);
+
+ if (!status)
+ mutex_lock(&ice_global_cfg_lock_sw);
+ else if (status == -EALREADY)
+ ice_debug(hw, ICE_DBG_PKG, "Global config lock: No work to do\n");
+
+ return status;
+}
+
+/**
+ * ice_release_global_cfg_lock
+ * @hw: pointer to the HW structure
+ *
+ * This function will release the global config lock.
+ */
+static void ice_release_global_cfg_lock(struct ice_hw *hw)
+{
+ mutex_unlock(&ice_global_cfg_lock_sw);
+ ice_release_res(hw, ICE_GLOBAL_CFG_LOCK_RES_ID);
+}
+
+/**
+ * ice_acquire_change_lock
+ * @hw: pointer to the HW structure
+ * @access: access type (read or write)
+ *
+ * This function will request ownership of the change lock.
+ */
+int
+ice_acquire_change_lock(struct ice_hw *hw, enum ice_aq_res_access_type access)
+{
+ return ice_acquire_res(hw, ICE_CHANGE_LOCK_RES_ID, access,
+ ICE_CHANGE_LOCK_TIMEOUT);
+}
+
+/**
+ * ice_release_change_lock
+ * @hw: pointer to the HW structure
+ *
+ * This function will release the change lock using the proper Admin Command.
+ */
+void ice_release_change_lock(struct ice_hw *hw)
+{
+ ice_release_res(hw, ICE_CHANGE_LOCK_RES_ID);
+}
+
+/**
+ * ice_aq_download_pkg
+ * @hw: pointer to the hardware structure
+ * @pkg_buf: the package buffer to transfer
+ * @buf_size: the size of the package buffer
+ * @last_buf: last buffer indicator
+ * @error_offset: returns error offset
+ * @error_info: returns error information
+ * @cd: pointer to command details structure or NULL
+ *
+ * Download Package (0x0C40)
+ */
+static int
+ice_aq_download_pkg(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf,
+ u16 buf_size, bool last_buf, u32 *error_offset,
+ u32 *error_info, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_download_pkg *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ if (error_offset)
+ *error_offset = 0;
+ if (error_info)
+ *error_info = 0;
+
+ cmd = &desc.params.download_pkg;
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_download_pkg);
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ if (last_buf)
+ cmd->flags |= ICE_AQC_DOWNLOAD_PKG_LAST_BUF;
+
+ status = ice_aq_send_cmd(hw, &desc, pkg_buf, buf_size, cd);
+ if (status == -EIO) {
+ /* Read error from buffer only when the FW returned an error */
+ struct ice_aqc_download_pkg_resp *resp;
+
+ resp = (struct ice_aqc_download_pkg_resp *)pkg_buf;
+ if (error_offset)
+ *error_offset = le32_to_cpu(resp->error_offset);
+ if (error_info)
+ *error_info = le32_to_cpu(resp->error_info);
+ }
+
+ return status;
+}
+
+/**
+ * ice_aq_upload_section
+ * @hw: pointer to the hardware structure
+ * @pkg_buf: the package buffer which will receive the section
+ * @buf_size: the size of the package buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * Upload Section (0x0C41)
+ */
+int
+ice_aq_upload_section(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf,
+ u16 buf_size, struct ice_sq_cd *cd)
+{
+ struct ice_aq_desc desc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_upload_section);
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ return ice_aq_send_cmd(hw, &desc, pkg_buf, buf_size, cd);
+}
+
+/**
+ * ice_aq_update_pkg
+ * @hw: pointer to the hardware structure
+ * @pkg_buf: the package cmd buffer
+ * @buf_size: the size of the package cmd buffer
+ * @last_buf: last buffer indicator
+ * @error_offset: returns error offset
+ * @error_info: returns error information
+ * @cd: pointer to command details structure or NULL
+ *
+ * Update Package (0x0C42)
+ */
+static int
+ice_aq_update_pkg(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf, u16 buf_size,
+ bool last_buf, u32 *error_offset, u32 *error_info,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_download_pkg *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ if (error_offset)
+ *error_offset = 0;
+ if (error_info)
+ *error_info = 0;
+
+ cmd = &desc.params.download_pkg;
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_update_pkg);
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ if (last_buf)
+ cmd->flags |= ICE_AQC_DOWNLOAD_PKG_LAST_BUF;
+
+ status = ice_aq_send_cmd(hw, &desc, pkg_buf, buf_size, cd);
+ if (status == -EIO) {
+ /* Read error from buffer only when the FW returned an error */
+ struct ice_aqc_download_pkg_resp *resp;
+
+ resp = (struct ice_aqc_download_pkg_resp *)pkg_buf;
+ if (error_offset)
+ *error_offset = le32_to_cpu(resp->error_offset);
+ if (error_info)
+ *error_info = le32_to_cpu(resp->error_info);
+ }
+
+ return status;
+}
+
+/**
+ * ice_find_seg_in_pkg
+ * @hw: pointer to the hardware structure
+ * @seg_type: the segment type to search for (i.e., SEGMENT_TYPE_CPK)
+ * @pkg_hdr: pointer to the package header to be searched
+ *
+ * This function searches a package file for a particular segment type. On
+ * success it returns a pointer to the segment header, otherwise it will
+ * return NULL.
+ */
+static struct ice_generic_seg_hdr *
+ice_find_seg_in_pkg(struct ice_hw *hw, u32 seg_type,
+ struct ice_pkg_hdr *pkg_hdr)
+{
+ u32 i;
+
+ ice_debug(hw, ICE_DBG_PKG, "Package format version: %d.%d.%d.%d\n",
+ pkg_hdr->pkg_format_ver.major, pkg_hdr->pkg_format_ver.minor,
+ pkg_hdr->pkg_format_ver.update,
+ pkg_hdr->pkg_format_ver.draft);
+
+ /* Search all package segments for the requested segment type */
+ for (i = 0; i < le32_to_cpu(pkg_hdr->seg_count); i++) {
+ struct ice_generic_seg_hdr *seg;
+
+ seg = (struct ice_generic_seg_hdr *)
+ ((u8 *)pkg_hdr + le32_to_cpu(pkg_hdr->seg_offset[i]));
+
+ if (le32_to_cpu(seg->seg_type) == seg_type)
+ return seg;
+ }
+
+ return NULL;
+}
+
+/**
+ * ice_update_pkg_no_lock
+ * @hw: pointer to the hardware structure
+ * @bufs: pointer to an array of buffers
+ * @count: the number of buffers in the array
+ */
+static int
+ice_update_pkg_no_lock(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
+{
+ int status = 0;
+ u32 i;
+
+ for (i = 0; i < count; i++) {
+ struct ice_buf_hdr *bh = (struct ice_buf_hdr *)(bufs + i);
+ bool last = ((i + 1) == count);
+ u32 offset, info;
+
+ status = ice_aq_update_pkg(hw, bh, le16_to_cpu(bh->data_end),
+ last, &offset, &info, NULL);
+
+ if (status) {
+ ice_debug(hw, ICE_DBG_PKG, "Update pkg failed: err %d off %d inf %d\n",
+ status, offset, info);
+ break;
+ }
+ }
+
+ return status;
+}
+
+/**
+ * ice_update_pkg
+ * @hw: pointer to the hardware structure
+ * @bufs: pointer to an array of buffers
+ * @count: the number of buffers in the array
+ *
+ * Obtains change lock and updates package.
+ */
+static int ice_update_pkg(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
+{
+ int status;
+
+ status = ice_acquire_change_lock(hw, ICE_RES_WRITE);
+ if (status)
+ return status;
+
+ status = ice_update_pkg_no_lock(hw, bufs, count);
+
+ ice_release_change_lock(hw);
+
+ return status;
+}
+
+static enum ice_ddp_state ice_map_aq_err_to_ddp_state(enum ice_aq_err aq_err)
+{
+ switch (aq_err) {
+ case ICE_AQ_RC_ENOSEC:
+ case ICE_AQ_RC_EBADSIG:
+ return ICE_DDP_PKG_FILE_SIGNATURE_INVALID;
+ case ICE_AQ_RC_ESVN:
+ return ICE_DDP_PKG_FILE_REVISION_TOO_LOW;
+ case ICE_AQ_RC_EBADMAN:
+ case ICE_AQ_RC_EBADBUF:
+ return ICE_DDP_PKG_LOAD_ERROR;
+ default:
+ return ICE_DDP_PKG_ERR;
+ }
+}
+
+/**
+ * ice_dwnld_cfg_bufs
+ * @hw: pointer to the hardware structure
+ * @bufs: pointer to an array of buffers
+ * @count: the number of buffers in the array
+ *
+ * Obtains global config lock and downloads the package configuration buffers
+ * to the firmware. Metadata buffers are skipped, and the first metadata buffer
+ * found indicates that the rest of the buffers are all metadata buffers.
+ */
+static enum ice_ddp_state
+ice_dwnld_cfg_bufs(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
+{
+ enum ice_ddp_state state = ICE_DDP_PKG_SUCCESS;
+ struct ice_buf_hdr *bh;
+ enum ice_aq_err err;
+ u32 offset, info, i;
+ int status;
+
+ if (!bufs || !count)
+ return ICE_DDP_PKG_ERR;
+
+ /* If the first buffer's first section has its metadata bit set
+ * then there are no buffers to be downloaded, and the operation is
+ * considered a success.
+ */
+ bh = (struct ice_buf_hdr *)bufs;
+ if (le32_to_cpu(bh->section_entry[0].type) & ICE_METADATA_BUF)
+ return ICE_DDP_PKG_SUCCESS;
+
+ status = ice_acquire_global_cfg_lock(hw, ICE_RES_WRITE);
+ if (status) {
+ if (status == -EALREADY)
+ return ICE_DDP_PKG_ALREADY_LOADED;
+ return ice_map_aq_err_to_ddp_state(hw->adminq.sq_last_status);
+ }
+
+ for (i = 0; i < count; i++) {
+ bool last = ((i + 1) == count);
+
+ if (!last) {
+ /* check next buffer for metadata flag */
+ bh = (struct ice_buf_hdr *)(bufs + i + 1);
+
+ /* A set metadata flag in the next buffer will signal
+ * that the current buffer will be the last buffer
+ * downloaded
+ */
+ if (le16_to_cpu(bh->section_count))
+ if (le32_to_cpu(bh->section_entry[0].type) &
+ ICE_METADATA_BUF)
+ last = true;
+ }
+
+ bh = (struct ice_buf_hdr *)(bufs + i);
+
+ status = ice_aq_download_pkg(hw, bh, ICE_PKG_BUF_SIZE, last,
+ &offset, &info, NULL);
+
+ /* Save AQ status from download package */
+ if (status) {
+ ice_debug(hw, ICE_DBG_PKG, "Pkg download failed: err %d off %d inf %d\n",
+ status, offset, info);
+ err = hw->adminq.sq_last_status;
+ state = ice_map_aq_err_to_ddp_state(err);
+ break;
+ }
+
+ if (last)
+ break;
+ }
+
+ if (!status) {
+ status = ice_set_vlan_mode(hw);
+ if (status)
+ ice_debug(hw, ICE_DBG_PKG, "Failed to set VLAN mode: err %d\n",
+ status);
+ }
+
+ ice_release_global_cfg_lock(hw);
+
+ return state;
+}
+
+/**
+ * ice_aq_get_pkg_info_list
+ * @hw: pointer to the hardware structure
+ * @pkg_info: the buffer which will receive the information list
+ * @buf_size: the size of the pkg_info information buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get Package Info List (0x0C43)
+ */
+static int
+ice_aq_get_pkg_info_list(struct ice_hw *hw,
+ struct ice_aqc_get_pkg_info_resp *pkg_info,
+ u16 buf_size, struct ice_sq_cd *cd)
+{
+ struct ice_aq_desc desc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_pkg_info_list);
+
+ return ice_aq_send_cmd(hw, &desc, pkg_info, buf_size, cd);
+}
+
+/**
+ * ice_download_pkg
+ * @hw: pointer to the hardware structure
+ * @ice_seg: pointer to the segment of the package to be downloaded
+ *
+ * Handles the download of a complete package.
+ */
+static enum ice_ddp_state
+ice_download_pkg(struct ice_hw *hw, struct ice_seg *ice_seg)
+{
+ struct ice_buf_table *ice_buf_tbl;
+ int status;
+
+ ice_debug(hw, ICE_DBG_PKG, "Segment format version: %d.%d.%d.%d\n",
+ ice_seg->hdr.seg_format_ver.major,
+ ice_seg->hdr.seg_format_ver.minor,
+ ice_seg->hdr.seg_format_ver.update,
+ ice_seg->hdr.seg_format_ver.draft);
+
+ ice_debug(hw, ICE_DBG_PKG, "Seg: type 0x%X, size %d, name %s\n",
+ le32_to_cpu(ice_seg->hdr.seg_type),
+ le32_to_cpu(ice_seg->hdr.seg_size), ice_seg->hdr.seg_id);
+
+ ice_buf_tbl = ice_find_buf_table(ice_seg);
+
+ ice_debug(hw, ICE_DBG_PKG, "Seg buf count: %d\n",
+ le32_to_cpu(ice_buf_tbl->buf_count));
+
+ status = ice_dwnld_cfg_bufs(hw, ice_buf_tbl->buf_array,
+ le32_to_cpu(ice_buf_tbl->buf_count));
+
+ ice_post_pkg_dwnld_vlan_mode_cfg(hw);
+
+ return status;
+}
+
+/**
+ * ice_init_pkg_info
+ * @hw: pointer to the hardware structure
+ * @pkg_hdr: pointer to the driver's package hdr
+ *
+ * Saves off the package details into the HW structure.
+ */
+static enum ice_ddp_state
+ice_init_pkg_info(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr)
+{
+ struct ice_generic_seg_hdr *seg_hdr;
+
+ if (!pkg_hdr)
+ return ICE_DDP_PKG_ERR;
+
+ seg_hdr = ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE, pkg_hdr);
+ if (seg_hdr) {
+ struct ice_meta_sect *meta;
+ struct ice_pkg_enum state;
+
+ memset(&state, 0, sizeof(state));
+
+ /* Get package information from the Metadata Section */
+ meta = ice_pkg_enum_section((struct ice_seg *)seg_hdr, &state,
+ ICE_SID_METADATA);
+ if (!meta) {
+ ice_debug(hw, ICE_DBG_INIT, "Did not find ice metadata section in package\n");
+ return ICE_DDP_PKG_INVALID_FILE;
+ }
+
+ hw->pkg_ver = meta->ver;
+ memcpy(hw->pkg_name, meta->name, sizeof(meta->name));
+
+ ice_debug(hw, ICE_DBG_PKG, "Pkg: %d.%d.%d.%d, %s\n",
+ meta->ver.major, meta->ver.minor, meta->ver.update,
+ meta->ver.draft, meta->name);
+
+ hw->ice_seg_fmt_ver = seg_hdr->seg_format_ver;
+ memcpy(hw->ice_seg_id, seg_hdr->seg_id,
+ sizeof(hw->ice_seg_id));
+
+ ice_debug(hw, ICE_DBG_PKG, "Ice Seg: %d.%d.%d.%d, %s\n",
+ seg_hdr->seg_format_ver.major,
+ seg_hdr->seg_format_ver.minor,
+ seg_hdr->seg_format_ver.update,
+ seg_hdr->seg_format_ver.draft,
+ seg_hdr->seg_id);
+ } else {
+ ice_debug(hw, ICE_DBG_INIT, "Did not find ice segment in driver package\n");
+ return ICE_DDP_PKG_INVALID_FILE;
+ }
+
+ return ICE_DDP_PKG_SUCCESS;
+}
+
+/**
+ * ice_get_pkg_info
+ * @hw: pointer to the hardware structure
+ *
+ * Store details of the package currently loaded in HW into the HW structure.
+ */
+static enum ice_ddp_state ice_get_pkg_info(struct ice_hw *hw)
+{
+ enum ice_ddp_state state = ICE_DDP_PKG_SUCCESS;
+ struct ice_aqc_get_pkg_info_resp *pkg_info;
+ u16 size;
+ u32 i;
+
+ size = struct_size(pkg_info, pkg_info, ICE_PKG_CNT);
+ pkg_info = kzalloc(size, GFP_KERNEL);
+ if (!pkg_info)
+ return ICE_DDP_PKG_ERR;
+
+ if (ice_aq_get_pkg_info_list(hw, pkg_info, size, NULL)) {
+ state = ICE_DDP_PKG_ERR;
+ goto init_pkg_free_alloc;
+ }
+
+ for (i = 0; i < le32_to_cpu(pkg_info->count); i++) {
+#define ICE_PKG_FLAG_COUNT 4
+ char flags[ICE_PKG_FLAG_COUNT + 1] = { 0 };
+ u8 place = 0;
+
+ if (pkg_info->pkg_info[i].is_active) {
+ flags[place++] = 'A';
+ hw->active_pkg_ver = pkg_info->pkg_info[i].ver;
+ hw->active_track_id =
+ le32_to_cpu(pkg_info->pkg_info[i].track_id);
+ memcpy(hw->active_pkg_name,
+ pkg_info->pkg_info[i].name,
+ sizeof(pkg_info->pkg_info[i].name));
+ hw->active_pkg_in_nvm = pkg_info->pkg_info[i].is_in_nvm;
+ }
+ if (pkg_info->pkg_info[i].is_active_at_boot)
+ flags[place++] = 'B';
+ if (pkg_info->pkg_info[i].is_modified)
+ flags[place++] = 'M';
+ if (pkg_info->pkg_info[i].is_in_nvm)
+ flags[place++] = 'N';
+
+ ice_debug(hw, ICE_DBG_PKG, "Pkg[%d]: %d.%d.%d.%d,%s,%s\n",
+ i, pkg_info->pkg_info[i].ver.major,
+ pkg_info->pkg_info[i].ver.minor,
+ pkg_info->pkg_info[i].ver.update,
+ pkg_info->pkg_info[i].ver.draft,
+ pkg_info->pkg_info[i].name, flags);
+ }
+
+init_pkg_free_alloc:
+ kfree(pkg_info);
+
+ return state;
+}
+
+/**
+ * ice_verify_pkg - verify package
+ * @pkg: pointer to the package buffer
+ * @len: size of the package buffer
+ *
+ * Verifies various attributes of the package file, including length, format
+ * version, and the requirement of at least one segment.
+ */
+static enum ice_ddp_state ice_verify_pkg(struct ice_pkg_hdr *pkg, u32 len)
+{
+ u32 seg_count;
+ u32 i;
+
+ if (len < struct_size(pkg, seg_offset, 1))
+ return ICE_DDP_PKG_INVALID_FILE;
+
+ if (pkg->pkg_format_ver.major != ICE_PKG_FMT_VER_MAJ ||
+ pkg->pkg_format_ver.minor != ICE_PKG_FMT_VER_MNR ||
+ pkg->pkg_format_ver.update != ICE_PKG_FMT_VER_UPD ||
+ pkg->pkg_format_ver.draft != ICE_PKG_FMT_VER_DFT)
+ return ICE_DDP_PKG_INVALID_FILE;
+
+ /* pkg must have at least one segment */
+ seg_count = le32_to_cpu(pkg->seg_count);
+ if (seg_count < 1)
+ return ICE_DDP_PKG_INVALID_FILE;
+
+ /* make sure segment array fits in package length */
+ if (len < struct_size(pkg, seg_offset, seg_count))
+ return ICE_DDP_PKG_INVALID_FILE;
+
+ /* all segments must fit within length */
+ for (i = 0; i < seg_count; i++) {
+ u32 off = le32_to_cpu(pkg->seg_offset[i]);
+ struct ice_generic_seg_hdr *seg;
+
+ /* segment header must fit */
+ if (len < off + sizeof(*seg))
+ return ICE_DDP_PKG_INVALID_FILE;
+
+ seg = (struct ice_generic_seg_hdr *)((u8 *)pkg + off);
+
+ /* segment body must fit */
+ if (len < off + le32_to_cpu(seg->seg_size))
+ return ICE_DDP_PKG_INVALID_FILE;
+ }
+
+ return ICE_DDP_PKG_SUCCESS;
+}
+
+/**
+ * ice_free_seg - free package segment pointer
+ * @hw: pointer to the hardware structure
+ *
+ * Frees the package segment pointer in the proper manner, depending on if the
+ * segment was allocated or just the passed in pointer was stored.
+ */
+void ice_free_seg(struct ice_hw *hw)
+{
+ if (hw->pkg_copy) {
+ devm_kfree(ice_hw_to_dev(hw), hw->pkg_copy);
+ hw->pkg_copy = NULL;
+ hw->pkg_size = 0;
+ }
+ hw->seg = NULL;
+}
+
+/**
+ * ice_init_pkg_regs - initialize additional package registers
+ * @hw: pointer to the hardware structure
+ */
+static void ice_init_pkg_regs(struct ice_hw *hw)
+{
+#define ICE_SW_BLK_INP_MASK_L 0xFFFFFFFF
+#define ICE_SW_BLK_INP_MASK_H 0x0000FFFF
+#define ICE_SW_BLK_IDX 0
+
+ /* setup Switch block input mask, which is 48-bits in two parts */
+ wr32(hw, GL_PREEXT_L2_PMASK0(ICE_SW_BLK_IDX), ICE_SW_BLK_INP_MASK_L);
+ wr32(hw, GL_PREEXT_L2_PMASK1(ICE_SW_BLK_IDX), ICE_SW_BLK_INP_MASK_H);
+}
+
+/**
+ * ice_chk_pkg_version - check package version for compatibility with driver
+ * @pkg_ver: pointer to a version structure to check
+ *
+ * Check to make sure that the package about to be downloaded is compatible with
+ * the driver. To be compatible, the major and minor components of the package
+ * version must match our ICE_PKG_SUPP_VER_MAJ and ICE_PKG_SUPP_VER_MNR
+ * definitions.
+ */
+static enum ice_ddp_state ice_chk_pkg_version(struct ice_pkg_ver *pkg_ver)
+{
+ if (pkg_ver->major > ICE_PKG_SUPP_VER_MAJ ||
+ (pkg_ver->major == ICE_PKG_SUPP_VER_MAJ &&
+ pkg_ver->minor > ICE_PKG_SUPP_VER_MNR))
+ return ICE_DDP_PKG_FILE_VERSION_TOO_HIGH;
+ else if (pkg_ver->major < ICE_PKG_SUPP_VER_MAJ ||
+ (pkg_ver->major == ICE_PKG_SUPP_VER_MAJ &&
+ pkg_ver->minor < ICE_PKG_SUPP_VER_MNR))
+ return ICE_DDP_PKG_FILE_VERSION_TOO_LOW;
+
+ return ICE_DDP_PKG_SUCCESS;
+}
+
+/**
+ * ice_chk_pkg_compat
+ * @hw: pointer to the hardware structure
+ * @ospkg: pointer to the package hdr
+ * @seg: pointer to the package segment hdr
+ *
+ * This function checks the package version compatibility with driver and NVM
+ */
+static enum ice_ddp_state
+ice_chk_pkg_compat(struct ice_hw *hw, struct ice_pkg_hdr *ospkg,
+ struct ice_seg **seg)
+{
+ struct ice_aqc_get_pkg_info_resp *pkg;
+ enum ice_ddp_state state;
+ u16 size;
+ u32 i;
+
+ /* Check package version compatibility */
+ state = ice_chk_pkg_version(&hw->pkg_ver);
+ if (state) {
+ ice_debug(hw, ICE_DBG_INIT, "Package version check failed.\n");
+ return state;
+ }
+
+ /* find ICE segment in given package */
+ *seg = (struct ice_seg *)ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE,
+ ospkg);
+ if (!*seg) {
+ ice_debug(hw, ICE_DBG_INIT, "no ice segment in package.\n");
+ return ICE_DDP_PKG_INVALID_FILE;
+ }
+
+ /* Check if FW is compatible with the OS package */
+ size = struct_size(pkg, pkg_info, ICE_PKG_CNT);
+ pkg = kzalloc(size, GFP_KERNEL);
+ if (!pkg)
+ return ICE_DDP_PKG_ERR;
+
+ if (ice_aq_get_pkg_info_list(hw, pkg, size, NULL)) {
+ state = ICE_DDP_PKG_LOAD_ERROR;
+ goto fw_ddp_compat_free_alloc;
+ }
+
+ for (i = 0; i < le32_to_cpu(pkg->count); i++) {
+ /* loop till we find the NVM package */
+ if (!pkg->pkg_info[i].is_in_nvm)
+ continue;
+ if ((*seg)->hdr.seg_format_ver.major !=
+ pkg->pkg_info[i].ver.major ||
+ (*seg)->hdr.seg_format_ver.minor >
+ pkg->pkg_info[i].ver.minor) {
+ state = ICE_DDP_PKG_FW_MISMATCH;
+ ice_debug(hw, ICE_DBG_INIT, "OS package is not compatible with NVM.\n");
+ }
+ /* done processing NVM package so break */
+ break;
+ }
+fw_ddp_compat_free_alloc:
+ kfree(pkg);
+ return state;
+}
+
+/**
+ * ice_sw_fv_handler
+ * @sect_type: section type
+ * @section: pointer to section
+ * @index: index of the field vector entry to be returned
+ * @offset: ptr to variable that receives the offset in the field vector table
+ *
+ * This is a callback function that can be passed to ice_pkg_enum_entry.
+ * This function treats the given section as of type ice_sw_fv_section and
+ * enumerates offset field. "offset" is an index into the field vector table.
+ */
+static void *
+ice_sw_fv_handler(u32 sect_type, void *section, u32 index, u32 *offset)
+{
+ struct ice_sw_fv_section *fv_section = section;
+
+ if (!section || sect_type != ICE_SID_FLD_VEC_SW)
+ return NULL;
+ if (index >= le16_to_cpu(fv_section->count))
+ return NULL;
+ if (offset)
+ /* "index" passed in to this function is relative to a given
+ * 4k block. To get to the true index into the field vector
+ * table need to add the relative index to the base_offset
+ * field of this section
+ */
+ *offset = le16_to_cpu(fv_section->base_offset) + index;
+ return fv_section->fv + index;
+}
+
+/**
+ * ice_get_prof_index_max - get the max profile index for used profile
+ * @hw: pointer to the HW struct
+ *
+ * Calling this function will get the max profile index for used profile
+ * and store the index number in struct ice_switch_info *switch_info
+ * in HW for following use.
+ */
+static int ice_get_prof_index_max(struct ice_hw *hw)
+{
+ u16 prof_index = 0, j, max_prof_index = 0;
+ struct ice_pkg_enum state;
+ struct ice_seg *ice_seg;
+ bool flag = false;
+ struct ice_fv *fv;
+ u32 offset;
+
+ memset(&state, 0, sizeof(state));
+
+ if (!hw->seg)
+ return -EINVAL;
+
+ ice_seg = hw->seg;
+
+ do {
+ fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW,
+ &offset, ice_sw_fv_handler);
+ if (!fv)
+ break;
+ ice_seg = NULL;
+
+ /* in the profile that not be used, the prot_id is set to 0xff
+ * and the off is set to 0x1ff for all the field vectors.
+ */
+ for (j = 0; j < hw->blk[ICE_BLK_SW].es.fvw; j++)
+ if (fv->ew[j].prot_id != ICE_PROT_INVALID ||
+ fv->ew[j].off != ICE_FV_OFFSET_INVAL)
+ flag = true;
+ if (flag && prof_index > max_prof_index)
+ max_prof_index = prof_index;
+
+ prof_index++;
+ flag = false;
+ } while (fv);
+
+ hw->switch_info->max_used_prof_index = max_prof_index;
+
+ return 0;
+}
+
+/**
+ * ice_get_ddp_pkg_state - get DDP pkg state after download
+ * @hw: pointer to the HW struct
+ * @already_loaded: indicates if pkg was already loaded onto the device
+ */
+static enum ice_ddp_state
+ice_get_ddp_pkg_state(struct ice_hw *hw, bool already_loaded)
+{
+ if (hw->pkg_ver.major == hw->active_pkg_ver.major &&
+ hw->pkg_ver.minor == hw->active_pkg_ver.minor &&
+ hw->pkg_ver.update == hw->active_pkg_ver.update &&
+ hw->pkg_ver.draft == hw->active_pkg_ver.draft &&
+ !memcmp(hw->pkg_name, hw->active_pkg_name, sizeof(hw->pkg_name))) {
+ if (already_loaded)
+ return ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED;
+ else
+ return ICE_DDP_PKG_SUCCESS;
+ } else if (hw->active_pkg_ver.major != ICE_PKG_SUPP_VER_MAJ ||
+ hw->active_pkg_ver.minor != ICE_PKG_SUPP_VER_MNR) {
+ return ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED;
+ } else if (hw->active_pkg_ver.major == ICE_PKG_SUPP_VER_MAJ &&
+ hw->active_pkg_ver.minor == ICE_PKG_SUPP_VER_MNR) {
+ return ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED;
+ } else {
+ return ICE_DDP_PKG_ERR;
+ }
+}
+
+/**
+ * ice_init_pkg - initialize/download package
+ * @hw: pointer to the hardware structure
+ * @buf: pointer to the package buffer
+ * @len: size of the package buffer
+ *
+ * This function initializes a package. The package contains HW tables
+ * required to do packet processing. First, the function extracts package
+ * information such as version. Then it finds the ice configuration segment
+ * within the package; this function then saves a copy of the segment pointer
+ * within the supplied package buffer. Next, the function will cache any hints
+ * from the package, followed by downloading the package itself. Note, that if
+ * a previous PF driver has already downloaded the package successfully, then
+ * the current driver will not have to download the package again.
+ *
+ * The local package contents will be used to query default behavior and to
+ * update specific sections of the HW's version of the package (e.g. to update
+ * the parse graph to understand new protocols).
+ *
+ * This function stores a pointer to the package buffer memory, and it is
+ * expected that the supplied buffer will not be freed immediately. If the
+ * package buffer needs to be freed, such as when read from a file, use
+ * ice_copy_and_init_pkg() instead of directly calling ice_init_pkg() in this
+ * case.
+ */
+enum ice_ddp_state ice_init_pkg(struct ice_hw *hw, u8 *buf, u32 len)
+{
+ bool already_loaded = false;
+ enum ice_ddp_state state;
+ struct ice_pkg_hdr *pkg;
+ struct ice_seg *seg;
+
+ if (!buf || !len)
+ return ICE_DDP_PKG_ERR;
+
+ pkg = (struct ice_pkg_hdr *)buf;
+ state = ice_verify_pkg(pkg, len);
+ if (state) {
+ ice_debug(hw, ICE_DBG_INIT, "failed to verify pkg (err: %d)\n",
+ state);
+ return state;
+ }
+
+ /* initialize package info */
+ state = ice_init_pkg_info(hw, pkg);
+ if (state)
+ return state;
+
+ /* before downloading the package, check package version for
+ * compatibility with driver
+ */
+ state = ice_chk_pkg_compat(hw, pkg, &seg);
+ if (state)
+ return state;
+
+ /* initialize package hints and then download package */
+ ice_init_pkg_hints(hw, seg);
+ state = ice_download_pkg(hw, seg);
+ if (state == ICE_DDP_PKG_ALREADY_LOADED) {
+ ice_debug(hw, ICE_DBG_INIT, "package previously loaded - no work.\n");
+ already_loaded = true;
+ }
+
+ /* Get information on the package currently loaded in HW, then make sure
+ * the driver is compatible with this version.
+ */
+ if (!state || state == ICE_DDP_PKG_ALREADY_LOADED) {
+ state = ice_get_pkg_info(hw);
+ if (!state)
+ state = ice_get_ddp_pkg_state(hw, already_loaded);
+ }
+
+ if (ice_is_init_pkg_successful(state)) {
+ hw->seg = seg;
+ /* on successful package download update other required
+ * registers to support the package and fill HW tables
+ * with package content.
+ */
+ ice_init_pkg_regs(hw);
+ ice_fill_blk_tbls(hw);
+ ice_fill_hw_ptype(hw);
+ ice_get_prof_index_max(hw);
+ } else {
+ ice_debug(hw, ICE_DBG_INIT, "package load failed, %d\n",
+ state);
+ }
+
+ return state;
+}
+
+/**
+ * ice_copy_and_init_pkg - initialize/download a copy of the package
+ * @hw: pointer to the hardware structure
+ * @buf: pointer to the package buffer
+ * @len: size of the package buffer
+ *
+ * This function copies the package buffer, and then calls ice_init_pkg() to
+ * initialize the copied package contents.
+ *
+ * The copying is necessary if the package buffer supplied is constant, or if
+ * the memory may disappear shortly after calling this function.
+ *
+ * If the package buffer resides in the data segment and can be modified, the
+ * caller is free to use ice_init_pkg() instead of ice_copy_and_init_pkg().
+ *
+ * However, if the package buffer needs to be copied first, such as when being
+ * read from a file, the caller should use ice_copy_and_init_pkg().
+ *
+ * This function will first copy the package buffer, before calling
+ * ice_init_pkg(). The caller is free to immediately destroy the original
+ * package buffer, as the new copy will be managed by this function and
+ * related routines.
+ */
+enum ice_ddp_state
+ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, u32 len)
+{
+ enum ice_ddp_state state;
+ u8 *buf_copy;
+
+ if (!buf || !len)
+ return ICE_DDP_PKG_ERR;
+
+ buf_copy = devm_kmemdup(ice_hw_to_dev(hw), buf, len, GFP_KERNEL);
+
+ state = ice_init_pkg(hw, buf_copy, len);
+ if (!ice_is_init_pkg_successful(state)) {
+ /* Free the copy, since we failed to initialize the package */
+ devm_kfree(ice_hw_to_dev(hw), buf_copy);
+ } else {
+ /* Track the copied pkg so we can free it later */
+ hw->pkg_copy = buf_copy;
+ hw->pkg_size = len;
+ }
+
+ return state;
+}
+
+/**
+ * ice_is_init_pkg_successful - check if DDP init was successful
+ * @state: state of the DDP pkg after download
+ */
+bool ice_is_init_pkg_successful(enum ice_ddp_state state)
+{
+ switch (state) {
+ case ICE_DDP_PKG_SUCCESS:
+ case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED:
+ case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/**
+ * ice_pkg_buf_alloc
+ * @hw: pointer to the HW structure
+ *
+ * Allocates a package buffer and returns a pointer to the buffer header.
+ * Note: all package contents must be in Little Endian form.
+ */
+static struct ice_buf_build *ice_pkg_buf_alloc(struct ice_hw *hw)
+{
+ struct ice_buf_build *bld;
+ struct ice_buf_hdr *buf;
+
+ bld = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*bld), GFP_KERNEL);
+ if (!bld)
+ return NULL;
+
+ buf = (struct ice_buf_hdr *)bld;
+ buf->data_end = cpu_to_le16(offsetof(struct ice_buf_hdr,
+ section_entry));
+ return bld;
+}
+
+static bool ice_is_gtp_u_profile(u16 prof_idx)
+{
+ return (prof_idx >= ICE_PROFID_IPV6_GTPU_TEID &&
+ prof_idx <= ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER) ||
+ prof_idx == ICE_PROFID_IPV4_GTPU_TEID;
+}
+
+static bool ice_is_gtp_c_profile(u16 prof_idx)
+{
+ switch (prof_idx) {
+ case ICE_PROFID_IPV4_GTPC_TEID:
+ case ICE_PROFID_IPV4_GTPC_NO_TEID:
+ case ICE_PROFID_IPV6_GTPC_TEID:
+ case ICE_PROFID_IPV6_GTPC_NO_TEID:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/**
+ * ice_get_sw_prof_type - determine switch profile type
+ * @hw: pointer to the HW structure
+ * @fv: pointer to the switch field vector
+ * @prof_idx: profile index to check
+ */
+static enum ice_prof_type
+ice_get_sw_prof_type(struct ice_hw *hw, struct ice_fv *fv, u32 prof_idx)
+{
+ u16 i;
+
+ if (ice_is_gtp_c_profile(prof_idx))
+ return ICE_PROF_TUN_GTPC;
+
+ if (ice_is_gtp_u_profile(prof_idx))
+ return ICE_PROF_TUN_GTPU;
+
+ for (i = 0; i < hw->blk[ICE_BLK_SW].es.fvw; i++) {
+ /* UDP tunnel will have UDP_OF protocol ID and VNI offset */
+ if (fv->ew[i].prot_id == (u8)ICE_PROT_UDP_OF &&
+ fv->ew[i].off == ICE_VNI_OFFSET)
+ return ICE_PROF_TUN_UDP;
+
+ /* GRE tunnel will have GRE protocol */
+ if (fv->ew[i].prot_id == (u8)ICE_PROT_GRE_OF)
+ return ICE_PROF_TUN_GRE;
+ }
+
+ return ICE_PROF_NON_TUN;
+}
+
+/**
+ * ice_get_sw_fv_bitmap - Get switch field vector bitmap based on profile type
+ * @hw: pointer to hardware structure
+ * @req_profs: type of profiles requested
+ * @bm: pointer to memory for returning the bitmap of field vectors
+ */
+void
+ice_get_sw_fv_bitmap(struct ice_hw *hw, enum ice_prof_type req_profs,
+ unsigned long *bm)
+{
+ struct ice_pkg_enum state;
+ struct ice_seg *ice_seg;
+ struct ice_fv *fv;
+
+ if (req_profs == ICE_PROF_ALL) {
+ bitmap_set(bm, 0, ICE_MAX_NUM_PROFILES);
+ return;
+ }
+
+ memset(&state, 0, sizeof(state));
+ bitmap_zero(bm, ICE_MAX_NUM_PROFILES);
+ ice_seg = hw->seg;
+ do {
+ enum ice_prof_type prof_type;
+ u32 offset;
+
+ fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW,
+ &offset, ice_sw_fv_handler);
+ ice_seg = NULL;
+
+ if (fv) {
+ /* Determine field vector type */
+ prof_type = ice_get_sw_prof_type(hw, fv, offset);
+
+ if (req_profs & prof_type)
+ set_bit((u16)offset, bm);
+ }
+ } while (fv);
+}
+
+/**
+ * ice_get_sw_fv_list
+ * @hw: pointer to the HW structure
+ * @lkups: list of protocol types
+ * @bm: bitmap of field vectors to consider
+ * @fv_list: Head of a list
+ *
+ * Finds all the field vector entries from switch block that contain
+ * a given protocol ID and offset and returns a list of structures of type
+ * "ice_sw_fv_list_entry". Every structure in the list has a field vector
+ * definition and profile ID information
+ * NOTE: The caller of the function is responsible for freeing the memory
+ * allocated for every list entry.
+ */
+int
+ice_get_sw_fv_list(struct ice_hw *hw, struct ice_prot_lkup_ext *lkups,
+ unsigned long *bm, struct list_head *fv_list)
+{
+ struct ice_sw_fv_list_entry *fvl;
+ struct ice_sw_fv_list_entry *tmp;
+ struct ice_pkg_enum state;
+ struct ice_seg *ice_seg;
+ struct ice_fv *fv;
+ u32 offset;
+
+ memset(&state, 0, sizeof(state));
+
+ if (!lkups->n_val_words || !hw->seg)
+ return -EINVAL;
+
+ ice_seg = hw->seg;
+ do {
+ u16 i;
+
+ fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW,
+ &offset, ice_sw_fv_handler);
+ if (!fv)
+ break;
+ ice_seg = NULL;
+
+ /* If field vector is not in the bitmap list, then skip this
+ * profile.
+ */
+ if (!test_bit((u16)offset, bm))
+ continue;
+
+ for (i = 0; i < lkups->n_val_words; i++) {
+ int j;
+
+ for (j = 0; j < hw->blk[ICE_BLK_SW].es.fvw; j++)
+ if (fv->ew[j].prot_id ==
+ lkups->fv_words[i].prot_id &&
+ fv->ew[j].off == lkups->fv_words[i].off)
+ break;
+ if (j >= hw->blk[ICE_BLK_SW].es.fvw)
+ break;
+ if (i + 1 == lkups->n_val_words) {
+ fvl = devm_kzalloc(ice_hw_to_dev(hw),
+ sizeof(*fvl), GFP_KERNEL);
+ if (!fvl)
+ goto err;
+ fvl->fv_ptr = fv;
+ fvl->profile_id = offset;
+ list_add(&fvl->list_entry, fv_list);
+ break;
+ }
+ }
+ } while (fv);
+ if (list_empty(fv_list)) {
+ dev_warn(ice_hw_to_dev(hw), "Required profiles not found in currently loaded DDP package");
+ return -EIO;
+ }
+
+ return 0;
+
+err:
+ list_for_each_entry_safe(fvl, tmp, fv_list, list_entry) {
+ list_del(&fvl->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), fvl);
+ }
+
+ return -ENOMEM;
+}
+
+/**
+ * ice_init_prof_result_bm - Initialize the profile result index bitmap
+ * @hw: pointer to hardware structure
+ */
+void ice_init_prof_result_bm(struct ice_hw *hw)
+{
+ struct ice_pkg_enum state;
+ struct ice_seg *ice_seg;
+ struct ice_fv *fv;
+
+ memset(&state, 0, sizeof(state));
+
+ if (!hw->seg)
+ return;
+
+ ice_seg = hw->seg;
+ do {
+ u32 off;
+ u16 i;
+
+ fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW,
+ &off, ice_sw_fv_handler);
+ ice_seg = NULL;
+ if (!fv)
+ break;
+
+ bitmap_zero(hw->switch_info->prof_res_bm[off],
+ ICE_MAX_FV_WORDS);
+
+ /* Determine empty field vector indices, these can be
+ * used for recipe results. Skip index 0, since it is
+ * always used for Switch ID.
+ */
+ for (i = 1; i < ICE_MAX_FV_WORDS; i++)
+ if (fv->ew[i].prot_id == ICE_PROT_INVALID &&
+ fv->ew[i].off == ICE_FV_OFFSET_INVAL)
+ set_bit(i, hw->switch_info->prof_res_bm[off]);
+ } while (fv);
+}
+
+/**
+ * ice_pkg_buf_free
+ * @hw: pointer to the HW structure
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ *
+ * Frees a package buffer
+ */
+void ice_pkg_buf_free(struct ice_hw *hw, struct ice_buf_build *bld)
+{
+ devm_kfree(ice_hw_to_dev(hw), bld);
+}
+
+/**
+ * ice_pkg_buf_reserve_section
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ * @count: the number of sections to reserve
+ *
+ * Reserves one or more section table entries in a package buffer. This routine
+ * can be called multiple times as long as they are made before calling
+ * ice_pkg_buf_alloc_section(). Once ice_pkg_buf_alloc_section()
+ * is called once, the number of sections that can be allocated will not be able
+ * to be increased; not using all reserved sections is fine, but this will
+ * result in some wasted space in the buffer.
+ * Note: all package contents must be in Little Endian form.
+ */
+static int
+ice_pkg_buf_reserve_section(struct ice_buf_build *bld, u16 count)
+{
+ struct ice_buf_hdr *buf;
+ u16 section_count;
+ u16 data_end;
+
+ if (!bld)
+ return -EINVAL;
+
+ buf = (struct ice_buf_hdr *)&bld->buf;
+
+ /* already an active section, can't increase table size */
+ section_count = le16_to_cpu(buf->section_count);
+ if (section_count > 0)
+ return -EIO;
+
+ if (bld->reserved_section_table_entries + count > ICE_MAX_S_COUNT)
+ return -EIO;
+ bld->reserved_section_table_entries += count;
+
+ data_end = le16_to_cpu(buf->data_end) +
+ flex_array_size(buf, section_entry, count);
+ buf->data_end = cpu_to_le16(data_end);
+
+ return 0;
+}
+
+/**
+ * ice_pkg_buf_alloc_section
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ * @type: the section type value
+ * @size: the size of the section to reserve (in bytes)
+ *
+ * Reserves memory in the buffer for a section's content and updates the
+ * buffers' status accordingly. This routine returns a pointer to the first
+ * byte of the section start within the buffer, which is used to fill in the
+ * section contents.
+ * Note: all package contents must be in Little Endian form.
+ */
+static void *
+ice_pkg_buf_alloc_section(struct ice_buf_build *bld, u32 type, u16 size)
+{
+ struct ice_buf_hdr *buf;
+ u16 sect_count;
+ u16 data_end;
+
+ if (!bld || !type || !size)
+ return NULL;
+
+ buf = (struct ice_buf_hdr *)&bld->buf;
+
+ /* check for enough space left in buffer */
+ data_end = le16_to_cpu(buf->data_end);
+
+ /* section start must align on 4 byte boundary */
+ data_end = ALIGN(data_end, 4);
+
+ if ((data_end + size) > ICE_MAX_S_DATA_END)
+ return NULL;
+
+ /* check for more available section table entries */
+ sect_count = le16_to_cpu(buf->section_count);
+ if (sect_count < bld->reserved_section_table_entries) {
+ void *section_ptr = ((u8 *)buf) + data_end;
+
+ buf->section_entry[sect_count].offset = cpu_to_le16(data_end);
+ buf->section_entry[sect_count].size = cpu_to_le16(size);
+ buf->section_entry[sect_count].type = cpu_to_le32(type);
+
+ data_end += size;
+ buf->data_end = cpu_to_le16(data_end);
+
+ buf->section_count = cpu_to_le16(sect_count + 1);
+ return section_ptr;
+ }
+
+ /* no free section table entries */
+ return NULL;
+}
+
+/**
+ * ice_pkg_buf_alloc_single_section
+ * @hw: pointer to the HW structure
+ * @type: the section type value
+ * @size: the size of the section to reserve (in bytes)
+ * @section: returns pointer to the section
+ *
+ * Allocates a package buffer with a single section.
+ * Note: all package contents must be in Little Endian form.
+ */
+struct ice_buf_build *
+ice_pkg_buf_alloc_single_section(struct ice_hw *hw, u32 type, u16 size,
+ void **section)
+{
+ struct ice_buf_build *buf;
+
+ if (!section)
+ return NULL;
+
+ buf = ice_pkg_buf_alloc(hw);
+ if (!buf)
+ return NULL;
+
+ if (ice_pkg_buf_reserve_section(buf, 1))
+ goto ice_pkg_buf_alloc_single_section_err;
+
+ *section = ice_pkg_buf_alloc_section(buf, type, size);
+ if (!*section)
+ goto ice_pkg_buf_alloc_single_section_err;
+
+ return buf;
+
+ice_pkg_buf_alloc_single_section_err:
+ ice_pkg_buf_free(hw, buf);
+ return NULL;
+}
+
+/**
+ * ice_pkg_buf_get_active_sections
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ *
+ * Returns the number of active sections. Before using the package buffer
+ * in an update package command, the caller should make sure that there is at
+ * least one active section - otherwise, the buffer is not legal and should
+ * not be used.
+ * Note: all package contents must be in Little Endian form.
+ */
+static u16 ice_pkg_buf_get_active_sections(struct ice_buf_build *bld)
+{
+ struct ice_buf_hdr *buf;
+
+ if (!bld)
+ return 0;
+
+ buf = (struct ice_buf_hdr *)&bld->buf;
+ return le16_to_cpu(buf->section_count);
+}
+
+/**
+ * ice_pkg_buf
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ *
+ * Return a pointer to the buffer's header
+ */
+struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld)
+{
+ if (!bld)
+ return NULL;
+
+ return &bld->buf;
+}
+
+/**
+ * ice_get_open_tunnel_port - retrieve an open tunnel port
+ * @hw: pointer to the HW structure
+ * @port: returns open port
+ * @type: type of tunnel, can be TNL_LAST if it doesn't matter
+ */
+bool
+ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port,
+ enum ice_tunnel_type type)
+{
+ bool res = false;
+ u16 i;
+
+ mutex_lock(&hw->tnl_lock);
+
+ for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++)
+ if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].port &&
+ (type == TNL_LAST || type == hw->tnl.tbl[i].type)) {
+ *port = hw->tnl.tbl[i].port;
+ res = true;
+ break;
+ }
+
+ mutex_unlock(&hw->tnl_lock);
+
+ return res;
+}
+
+/**
+ * ice_upd_dvm_boost_entry
+ * @hw: pointer to the HW structure
+ * @entry: pointer to double vlan boost entry info
+ */
+static int
+ice_upd_dvm_boost_entry(struct ice_hw *hw, struct ice_dvm_entry *entry)
+{
+ struct ice_boost_tcam_section *sect_rx, *sect_tx;
+ int status = -ENOSPC;
+ struct ice_buf_build *bld;
+ u8 val, dc, nm;
+
+ bld = ice_pkg_buf_alloc(hw);
+ if (!bld)
+ return -ENOMEM;
+
+ /* allocate 2 sections, one for Rx parser, one for Tx parser */
+ if (ice_pkg_buf_reserve_section(bld, 2))
+ goto ice_upd_dvm_boost_entry_err;
+
+ sect_rx = ice_pkg_buf_alloc_section(bld, ICE_SID_RXPARSER_BOOST_TCAM,
+ struct_size(sect_rx, tcam, 1));
+ if (!sect_rx)
+ goto ice_upd_dvm_boost_entry_err;
+ sect_rx->count = cpu_to_le16(1);
+
+ sect_tx = ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM,
+ struct_size(sect_tx, tcam, 1));
+ if (!sect_tx)
+ goto ice_upd_dvm_boost_entry_err;
+ sect_tx->count = cpu_to_le16(1);
+
+ /* copy original boost entry to update package buffer */
+ memcpy(sect_rx->tcam, entry->boost_entry, sizeof(*sect_rx->tcam));
+
+ /* re-write the don't care and never match bits accordingly */
+ if (entry->enable) {
+ /* all bits are don't care */
+ val = 0x00;
+ dc = 0xFF;
+ nm = 0x00;
+ } else {
+ /* disable, one never match bit, the rest are don't care */
+ val = 0x00;
+ dc = 0xF7;
+ nm = 0x08;
+ }
+
+ ice_set_key((u8 *)&sect_rx->tcam[0].key, sizeof(sect_rx->tcam[0].key),
+ &val, NULL, &dc, &nm, 0, sizeof(u8));
+
+ /* exact copy of entry to Tx section entry */
+ memcpy(sect_tx->tcam, sect_rx->tcam, sizeof(*sect_tx->tcam));
+
+ status = ice_update_pkg_no_lock(hw, ice_pkg_buf(bld), 1);
+
+ice_upd_dvm_boost_entry_err:
+ ice_pkg_buf_free(hw, bld);
+
+ return status;
+}
+
+/**
+ * ice_set_dvm_boost_entries
+ * @hw: pointer to the HW structure
+ *
+ * Enable double vlan by updating the appropriate boost tcam entries.
+ */
+int ice_set_dvm_boost_entries(struct ice_hw *hw)
+{
+ int status;
+ u16 i;
+
+ for (i = 0; i < hw->dvm_upd.count; i++) {
+ status = ice_upd_dvm_boost_entry(hw, &hw->dvm_upd.tbl[i]);
+ if (status)
+ return status;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_tunnel_idx_to_entry - convert linear index to the sparse one
+ * @hw: pointer to the HW structure
+ * @type: type of tunnel
+ * @idx: linear index
+ *
+ * Stack assumes we have 2 linear tables with indexes [0, count_valid),
+ * but really the port table may be sprase, and types are mixed, so convert
+ * the stack index into the device index.
+ */
+static u16 ice_tunnel_idx_to_entry(struct ice_hw *hw, enum ice_tunnel_type type,
+ u16 idx)
+{
+ u16 i;
+
+ for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++)
+ if (hw->tnl.tbl[i].valid &&
+ hw->tnl.tbl[i].type == type &&
+ idx-- == 0)
+ return i;
+
+ WARN_ON_ONCE(1);
+ return 0;
+}
+
+/**
+ * ice_create_tunnel
+ * @hw: pointer to the HW structure
+ * @index: device table entry
+ * @type: type of tunnel
+ * @port: port of tunnel to create
+ *
+ * Create a tunnel by updating the parse graph in the parser. We do that by
+ * creating a package buffer with the tunnel info and issuing an update package
+ * command.
+ */
+static int
+ice_create_tunnel(struct ice_hw *hw, u16 index,
+ enum ice_tunnel_type type, u16 port)
+{
+ struct ice_boost_tcam_section *sect_rx, *sect_tx;
+ struct ice_buf_build *bld;
+ int status = -ENOSPC;
+
+ mutex_lock(&hw->tnl_lock);
+
+ bld = ice_pkg_buf_alloc(hw);
+ if (!bld) {
+ status = -ENOMEM;
+ goto ice_create_tunnel_end;
+ }
+
+ /* allocate 2 sections, one for Rx parser, one for Tx parser */
+ if (ice_pkg_buf_reserve_section(bld, 2))
+ goto ice_create_tunnel_err;
+
+ sect_rx = ice_pkg_buf_alloc_section(bld, ICE_SID_RXPARSER_BOOST_TCAM,
+ struct_size(sect_rx, tcam, 1));
+ if (!sect_rx)
+ goto ice_create_tunnel_err;
+ sect_rx->count = cpu_to_le16(1);
+
+ sect_tx = ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM,
+ struct_size(sect_tx, tcam, 1));
+ if (!sect_tx)
+ goto ice_create_tunnel_err;
+ sect_tx->count = cpu_to_le16(1);
+
+ /* copy original boost entry to update package buffer */
+ memcpy(sect_rx->tcam, hw->tnl.tbl[index].boost_entry,
+ sizeof(*sect_rx->tcam));
+
+ /* over-write the never-match dest port key bits with the encoded port
+ * bits
+ */
+ ice_set_key((u8 *)&sect_rx->tcam[0].key, sizeof(sect_rx->tcam[0].key),
+ (u8 *)&port, NULL, NULL, NULL,
+ (u16)offsetof(struct ice_boost_key_value, hv_dst_port_key),
+ sizeof(sect_rx->tcam[0].key.key.hv_dst_port_key));
+
+ /* exact copy of entry to Tx section entry */
+ memcpy(sect_tx->tcam, sect_rx->tcam, sizeof(*sect_tx->tcam));
+
+ status = ice_update_pkg(hw, ice_pkg_buf(bld), 1);
+ if (!status)
+ hw->tnl.tbl[index].port = port;
+
+ice_create_tunnel_err:
+ ice_pkg_buf_free(hw, bld);
+
+ice_create_tunnel_end:
+ mutex_unlock(&hw->tnl_lock);
+
+ return status;
+}
+
+/**
+ * ice_destroy_tunnel
+ * @hw: pointer to the HW structure
+ * @index: device table entry
+ * @type: type of tunnel
+ * @port: port of tunnel to destroy (ignored if the all parameter is true)
+ *
+ * Destroys a tunnel or all tunnels by creating an update package buffer
+ * targeting the specific updates requested and then performing an update
+ * package.
+ */
+static int
+ice_destroy_tunnel(struct ice_hw *hw, u16 index, enum ice_tunnel_type type,
+ u16 port)
+{
+ struct ice_boost_tcam_section *sect_rx, *sect_tx;
+ struct ice_buf_build *bld;
+ int status = -ENOSPC;
+
+ mutex_lock(&hw->tnl_lock);
+
+ if (WARN_ON(!hw->tnl.tbl[index].valid ||
+ hw->tnl.tbl[index].type != type ||
+ hw->tnl.tbl[index].port != port)) {
+ status = -EIO;
+ goto ice_destroy_tunnel_end;
+ }
+
+ bld = ice_pkg_buf_alloc(hw);
+ if (!bld) {
+ status = -ENOMEM;
+ goto ice_destroy_tunnel_end;
+ }
+
+ /* allocate 2 sections, one for Rx parser, one for Tx parser */
+ if (ice_pkg_buf_reserve_section(bld, 2))
+ goto ice_destroy_tunnel_err;
+
+ sect_rx = ice_pkg_buf_alloc_section(bld, ICE_SID_RXPARSER_BOOST_TCAM,
+ struct_size(sect_rx, tcam, 1));
+ if (!sect_rx)
+ goto ice_destroy_tunnel_err;
+ sect_rx->count = cpu_to_le16(1);
+
+ sect_tx = ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM,
+ struct_size(sect_tx, tcam, 1));
+ if (!sect_tx)
+ goto ice_destroy_tunnel_err;
+ sect_tx->count = cpu_to_le16(1);
+
+ /* copy original boost entry to update package buffer, one copy to Rx
+ * section, another copy to the Tx section
+ */
+ memcpy(sect_rx->tcam, hw->tnl.tbl[index].boost_entry,
+ sizeof(*sect_rx->tcam));
+ memcpy(sect_tx->tcam, hw->tnl.tbl[index].boost_entry,
+ sizeof(*sect_tx->tcam));
+
+ status = ice_update_pkg(hw, ice_pkg_buf(bld), 1);
+ if (!status)
+ hw->tnl.tbl[index].port = 0;
+
+ice_destroy_tunnel_err:
+ ice_pkg_buf_free(hw, bld);
+
+ice_destroy_tunnel_end:
+ mutex_unlock(&hw->tnl_lock);
+
+ return status;
+}
+
+int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table,
+ unsigned int idx, struct udp_tunnel_info *ti)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ enum ice_tunnel_type tnl_type;
+ int status;
+ u16 index;
+
+ tnl_type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? TNL_VXLAN : TNL_GENEVE;
+ index = ice_tunnel_idx_to_entry(&pf->hw, tnl_type, idx);
+
+ status = ice_create_tunnel(&pf->hw, index, tnl_type, ntohs(ti->port));
+ if (status) {
+ netdev_err(netdev, "Error adding UDP tunnel - %d\n",
+ status);
+ return -EIO;
+ }
+
+ udp_tunnel_nic_set_port_priv(netdev, table, idx, index);
+ return 0;
+}
+
+int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table,
+ unsigned int idx, struct udp_tunnel_info *ti)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ enum ice_tunnel_type tnl_type;
+ int status;
+
+ tnl_type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? TNL_VXLAN : TNL_GENEVE;
+
+ status = ice_destroy_tunnel(&pf->hw, ti->hw_priv, tnl_type,
+ ntohs(ti->port));
+ if (status) {
+ netdev_err(netdev, "Error removing UDP tunnel - %d\n",
+ status);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_find_prot_off - find prot ID and offset pair, based on prof and FV index
+ * @hw: pointer to the hardware structure
+ * @blk: hardware block
+ * @prof: profile ID
+ * @fv_idx: field vector word index
+ * @prot: variable to receive the protocol ID
+ * @off: variable to receive the protocol offset
+ */
+int
+ice_find_prot_off(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 fv_idx,
+ u8 *prot, u16 *off)
+{
+ struct ice_fv_word *fv_ext;
+
+ if (prof >= hw->blk[blk].es.count)
+ return -EINVAL;
+
+ if (fv_idx >= hw->blk[blk].es.fvw)
+ return -EINVAL;
+
+ fv_ext = hw->blk[blk].es.t + (prof * hw->blk[blk].es.fvw);
+
+ *prot = fv_ext[fv_idx].prot_id;
+ *off = fv_ext[fv_idx].off;
+
+ return 0;
+}
+
+/* PTG Management */
+
+/**
+ * ice_ptg_find_ptype - Search for packet type group using packet type (ptype)
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @ptype: the ptype to search for
+ * @ptg: pointer to variable that receives the PTG
+ *
+ * This function will search the PTGs for a particular ptype, returning the
+ * PTG ID that contains it through the PTG parameter, with the value of
+ * ICE_DEFAULT_PTG (0) meaning it is part the default PTG.
+ */
+static int
+ice_ptg_find_ptype(struct ice_hw *hw, enum ice_block blk, u16 ptype, u8 *ptg)
+{
+ if (ptype >= ICE_XLT1_CNT || !ptg)
+ return -EINVAL;
+
+ *ptg = hw->blk[blk].xlt1.ptypes[ptype].ptg;
+ return 0;
+}
+
+/**
+ * ice_ptg_alloc_val - Allocates a new packet type group ID by value
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @ptg: the PTG to allocate
+ *
+ * This function allocates a given packet type group ID specified by the PTG
+ * parameter.
+ */
+static void ice_ptg_alloc_val(struct ice_hw *hw, enum ice_block blk, u8 ptg)
+{
+ hw->blk[blk].xlt1.ptg_tbl[ptg].in_use = true;
+}
+
+/**
+ * ice_ptg_remove_ptype - Removes ptype from a particular packet type group
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @ptype: the ptype to remove
+ * @ptg: the PTG to remove the ptype from
+ *
+ * This function will remove the ptype from the specific PTG, and move it to
+ * the default PTG (ICE_DEFAULT_PTG).
+ */
+static int
+ice_ptg_remove_ptype(struct ice_hw *hw, enum ice_block blk, u16 ptype, u8 ptg)
+{
+ struct ice_ptg_ptype **ch;
+ struct ice_ptg_ptype *p;
+
+ if (ptype > ICE_XLT1_CNT - 1)
+ return -EINVAL;
+
+ if (!hw->blk[blk].xlt1.ptg_tbl[ptg].in_use)
+ return -ENOENT;
+
+ /* Should not happen if .in_use is set, bad config */
+ if (!hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype)
+ return -EIO;
+
+ /* find the ptype within this PTG, and bypass the link over it */
+ p = hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype;
+ ch = &hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype;
+ while (p) {
+ if (ptype == (p - hw->blk[blk].xlt1.ptypes)) {
+ *ch = p->next_ptype;
+ break;
+ }
+
+ ch = &p->next_ptype;
+ p = p->next_ptype;
+ }
+
+ hw->blk[blk].xlt1.ptypes[ptype].ptg = ICE_DEFAULT_PTG;
+ hw->blk[blk].xlt1.ptypes[ptype].next_ptype = NULL;
+
+ return 0;
+}
+
+/**
+ * ice_ptg_add_mv_ptype - Adds/moves ptype to a particular packet type group
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @ptype: the ptype to add or move
+ * @ptg: the PTG to add or move the ptype to
+ *
+ * This function will either add or move a ptype to a particular PTG depending
+ * on if the ptype is already part of another group. Note that using a
+ * destination PTG ID of ICE_DEFAULT_PTG (0) will move the ptype to the
+ * default PTG.
+ */
+static int
+ice_ptg_add_mv_ptype(struct ice_hw *hw, enum ice_block blk, u16 ptype, u8 ptg)
+{
+ u8 original_ptg;
+ int status;
+
+ if (ptype > ICE_XLT1_CNT - 1)
+ return -EINVAL;
+
+ if (!hw->blk[blk].xlt1.ptg_tbl[ptg].in_use && ptg != ICE_DEFAULT_PTG)
+ return -ENOENT;
+
+ status = ice_ptg_find_ptype(hw, blk, ptype, &original_ptg);
+ if (status)
+ return status;
+
+ /* Is ptype already in the correct PTG? */
+ if (original_ptg == ptg)
+ return 0;
+
+ /* Remove from original PTG and move back to the default PTG */
+ if (original_ptg != ICE_DEFAULT_PTG)
+ ice_ptg_remove_ptype(hw, blk, ptype, original_ptg);
+
+ /* Moving to default PTG? Then we're done with this request */
+ if (ptg == ICE_DEFAULT_PTG)
+ return 0;
+
+ /* Add ptype to PTG at beginning of list */
+ hw->blk[blk].xlt1.ptypes[ptype].next_ptype =
+ hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype;
+ hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype =
+ &hw->blk[blk].xlt1.ptypes[ptype];
+
+ hw->blk[blk].xlt1.ptypes[ptype].ptg = ptg;
+ hw->blk[blk].xlt1.t[ptype] = ptg;
+
+ return 0;
+}
+
+/* Block / table size info */
+struct ice_blk_size_details {
+ u16 xlt1; /* # XLT1 entries */
+ u16 xlt2; /* # XLT2 entries */
+ u16 prof_tcam; /* # profile ID TCAM entries */
+ u16 prof_id; /* # profile IDs */
+ u8 prof_cdid_bits; /* # CDID one-hot bits used in key */
+ u16 prof_redir; /* # profile redirection entries */
+ u16 es; /* # extraction sequence entries */
+ u16 fvw; /* # field vector words */
+ u8 overwrite; /* overwrite existing entries allowed */
+ u8 reverse; /* reverse FV order */
+};
+
+static const struct ice_blk_size_details blk_sizes[ICE_BLK_COUNT] = {
+ /**
+ * Table Definitions
+ * XLT1 - Number of entries in XLT1 table
+ * XLT2 - Number of entries in XLT2 table
+ * TCAM - Number of entries Profile ID TCAM table
+ * CDID - Control Domain ID of the hardware block
+ * PRED - Number of entries in the Profile Redirection Table
+ * FV - Number of entries in the Field Vector
+ * FVW - Width (in WORDs) of the Field Vector
+ * OVR - Overwrite existing table entries
+ * REV - Reverse FV
+ */
+ /* XLT1 , XLT2 ,TCAM, PID,CDID,PRED, FV, FVW */
+ /* Overwrite , Reverse FV */
+ /* SW */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 256, 0, 256, 256, 48,
+ false, false },
+ /* ACL */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 128, 0, 128, 128, 32,
+ false, false },
+ /* FD */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 128, 0, 128, 128, 24,
+ false, true },
+ /* RSS */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 128, 0, 128, 128, 24,
+ true, true },
+ /* PE */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 64, 32, 0, 32, 32, 24,
+ false, false },
+};
+
+enum ice_sid_all {
+ ICE_SID_XLT1_OFF = 0,
+ ICE_SID_XLT2_OFF,
+ ICE_SID_PR_OFF,
+ ICE_SID_PR_REDIR_OFF,
+ ICE_SID_ES_OFF,
+ ICE_SID_OFF_COUNT,
+};
+
+/* Characteristic handling */
+
+/**
+ * ice_match_prop_lst - determine if properties of two lists match
+ * @list1: first properties list
+ * @list2: second properties list
+ *
+ * Count, cookies and the order must match in order to be considered equivalent.
+ */
+static bool
+ice_match_prop_lst(struct list_head *list1, struct list_head *list2)
+{
+ struct ice_vsig_prof *tmp1;
+ struct ice_vsig_prof *tmp2;
+ u16 chk_count = 0;
+ u16 count = 0;
+
+ /* compare counts */
+ list_for_each_entry(tmp1, list1, list)
+ count++;
+ list_for_each_entry(tmp2, list2, list)
+ chk_count++;
+ /* cppcheck-suppress knownConditionTrueFalse */
+ if (!count || count != chk_count)
+ return false;
+
+ tmp1 = list_first_entry(list1, struct ice_vsig_prof, list);
+ tmp2 = list_first_entry(list2, struct ice_vsig_prof, list);
+
+ /* profile cookies must compare, and in the exact same order to take
+ * into account priority
+ */
+ while (count--) {
+ if (tmp2->profile_cookie != tmp1->profile_cookie)
+ return false;
+
+ tmp1 = list_next_entry(tmp1, list);
+ tmp2 = list_next_entry(tmp2, list);
+ }
+
+ return true;
+}
+
+/* VSIG Management */
+
+/**
+ * ice_vsig_find_vsi - find a VSIG that contains a specified VSI
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsi: VSI of interest
+ * @vsig: pointer to receive the VSI group
+ *
+ * This function will lookup the VSI entry in the XLT2 list and return
+ * the VSI group its associated with.
+ */
+static int
+ice_vsig_find_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 *vsig)
+{
+ if (!vsig || vsi >= ICE_MAX_VSI)
+ return -EINVAL;
+
+ /* As long as there's a default or valid VSIG associated with the input
+ * VSI, the functions returns a success. Any handling of VSIG will be
+ * done by the following add, update or remove functions.
+ */
+ *vsig = hw->blk[blk].xlt2.vsis[vsi].vsig;
+
+ return 0;
+}
+
+/**
+ * ice_vsig_alloc_val - allocate a new VSIG by value
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsig: the VSIG to allocate
+ *
+ * This function will allocate a given VSIG specified by the VSIG parameter.
+ */
+static u16 ice_vsig_alloc_val(struct ice_hw *hw, enum ice_block blk, u16 vsig)
+{
+ u16 idx = vsig & ICE_VSIG_IDX_M;
+
+ if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use) {
+ INIT_LIST_HEAD(&hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst);
+ hw->blk[blk].xlt2.vsig_tbl[idx].in_use = true;
+ }
+
+ return ICE_VSIG_VALUE(idx, hw->pf_id);
+}
+
+/**
+ * ice_vsig_alloc - Finds a free entry and allocates a new VSIG
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ *
+ * This function will iterate through the VSIG list and mark the first
+ * unused entry for the new VSIG entry as used and return that value.
+ */
+static u16 ice_vsig_alloc(struct ice_hw *hw, enum ice_block blk)
+{
+ u16 i;
+
+ for (i = 1; i < ICE_MAX_VSIGS; i++)
+ if (!hw->blk[blk].xlt2.vsig_tbl[i].in_use)
+ return ice_vsig_alloc_val(hw, blk, i);
+
+ return ICE_DEFAULT_VSIG;
+}
+
+/**
+ * ice_find_dup_props_vsig - find VSI group with a specified set of properties
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @chs: characteristic list
+ * @vsig: returns the VSIG with the matching profiles, if found
+ *
+ * Each VSIG is associated with a characteristic set; i.e. all VSIs under
+ * a group have the same characteristic set. To check if there exists a VSIG
+ * which has the same characteristics as the input characteristics; this
+ * function will iterate through the XLT2 list and return the VSIG that has a
+ * matching configuration. In order to make sure that priorities are accounted
+ * for, the list must match exactly, including the order in which the
+ * characteristics are listed.
+ */
+static int
+ice_find_dup_props_vsig(struct ice_hw *hw, enum ice_block blk,
+ struct list_head *chs, u16 *vsig)
+{
+ struct ice_xlt2 *xlt2 = &hw->blk[blk].xlt2;
+ u16 i;
+
+ for (i = 0; i < xlt2->count; i++)
+ if (xlt2->vsig_tbl[i].in_use &&
+ ice_match_prop_lst(chs, &xlt2->vsig_tbl[i].prop_lst)) {
+ *vsig = ICE_VSIG_VALUE(i, hw->pf_id);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+/**
+ * ice_vsig_free - free VSI group
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsig: VSIG to remove
+ *
+ * The function will remove all VSIs associated with the input VSIG and move
+ * them to the DEFAULT_VSIG and mark the VSIG available.
+ */
+static int ice_vsig_free(struct ice_hw *hw, enum ice_block blk, u16 vsig)
+{
+ struct ice_vsig_prof *dtmp, *del;
+ struct ice_vsig_vsi *vsi_cur;
+ u16 idx;
+
+ idx = vsig & ICE_VSIG_IDX_M;
+ if (idx >= ICE_MAX_VSIGS)
+ return -EINVAL;
+
+ if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use)
+ return -ENOENT;
+
+ hw->blk[blk].xlt2.vsig_tbl[idx].in_use = false;
+
+ vsi_cur = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+ /* If the VSIG has at least 1 VSI then iterate through the
+ * list and remove the VSIs before deleting the group.
+ */
+ if (vsi_cur) {
+ /* remove all vsis associated with this VSIG XLT2 entry */
+ do {
+ struct ice_vsig_vsi *tmp = vsi_cur->next_vsi;
+
+ vsi_cur->vsig = ICE_DEFAULT_VSIG;
+ vsi_cur->changed = 1;
+ vsi_cur->next_vsi = NULL;
+ vsi_cur = tmp;
+ } while (vsi_cur);
+
+ /* NULL terminate head of VSI list */
+ hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi = NULL;
+ }
+
+ /* free characteristic list */
+ list_for_each_entry_safe(del, dtmp,
+ &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+ list) {
+ list_del(&del->list);
+ devm_kfree(ice_hw_to_dev(hw), del);
+ }
+
+ /* if VSIG characteristic list was cleared for reset
+ * re-initialize the list head
+ */
+ INIT_LIST_HEAD(&hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst);
+
+ return 0;
+}
+
+/**
+ * ice_vsig_remove_vsi - remove VSI from VSIG
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsi: VSI to remove
+ * @vsig: VSI group to remove from
+ *
+ * The function will remove the input VSI from its VSI group and move it
+ * to the DEFAULT_VSIG.
+ */
+static int
+ice_vsig_remove_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig)
+{
+ struct ice_vsig_vsi **vsi_head, *vsi_cur, *vsi_tgt;
+ u16 idx;
+
+ idx = vsig & ICE_VSIG_IDX_M;
+
+ if (vsi >= ICE_MAX_VSI || idx >= ICE_MAX_VSIGS)
+ return -EINVAL;
+
+ if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use)
+ return -ENOENT;
+
+ /* entry already in default VSIG, don't have to remove */
+ if (idx == ICE_DEFAULT_VSIG)
+ return 0;
+
+ vsi_head = &hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+ if (!(*vsi_head))
+ return -EIO;
+
+ vsi_tgt = &hw->blk[blk].xlt2.vsis[vsi];
+ vsi_cur = (*vsi_head);
+
+ /* iterate the VSI list, skip over the entry to be removed */
+ while (vsi_cur) {
+ if (vsi_tgt == vsi_cur) {
+ (*vsi_head) = vsi_cur->next_vsi;
+ break;
+ }
+ vsi_head = &vsi_cur->next_vsi;
+ vsi_cur = vsi_cur->next_vsi;
+ }
+
+ /* verify if VSI was removed from group list */
+ if (!vsi_cur)
+ return -ENOENT;
+
+ vsi_cur->vsig = ICE_DEFAULT_VSIG;
+ vsi_cur->changed = 1;
+ vsi_cur->next_vsi = NULL;
+
+ return 0;
+}
+
+/**
+ * ice_vsig_add_mv_vsi - add or move a VSI to a VSI group
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsi: VSI to move
+ * @vsig: destination VSI group
+ *
+ * This function will move or add the input VSI to the target VSIG.
+ * The function will find the original VSIG the VSI belongs to and
+ * move the entry to the DEFAULT_VSIG, update the original VSIG and
+ * then move entry to the new VSIG.
+ */
+static int
+ice_vsig_add_mv_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig)
+{
+ struct ice_vsig_vsi *tmp;
+ u16 orig_vsig, idx;
+ int status;
+
+ idx = vsig & ICE_VSIG_IDX_M;
+
+ if (vsi >= ICE_MAX_VSI || idx >= ICE_MAX_VSIGS)
+ return -EINVAL;
+
+ /* if VSIG not in use and VSIG is not default type this VSIG
+ * doesn't exist.
+ */
+ if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use &&
+ vsig != ICE_DEFAULT_VSIG)
+ return -ENOENT;
+
+ status = ice_vsig_find_vsi(hw, blk, vsi, &orig_vsig);
+ if (status)
+ return status;
+
+ /* no update required if vsigs match */
+ if (orig_vsig == vsig)
+ return 0;
+
+ if (orig_vsig != ICE_DEFAULT_VSIG) {
+ /* remove entry from orig_vsig and add to default VSIG */
+ status = ice_vsig_remove_vsi(hw, blk, vsi, orig_vsig);
+ if (status)
+ return status;
+ }
+
+ if (idx == ICE_DEFAULT_VSIG)
+ return 0;
+
+ /* Create VSI entry and add VSIG and prop_mask values */
+ hw->blk[blk].xlt2.vsis[vsi].vsig = vsig;
+ hw->blk[blk].xlt2.vsis[vsi].changed = 1;
+
+ /* Add new entry to the head of the VSIG list */
+ tmp = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+ hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi =
+ &hw->blk[blk].xlt2.vsis[vsi];
+ hw->blk[blk].xlt2.vsis[vsi].next_vsi = tmp;
+ hw->blk[blk].xlt2.t[vsi] = vsig;
+
+ return 0;
+}
+
+/**
+ * ice_prof_has_mask_idx - determine if profile index masking is identical
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @prof: profile to check
+ * @idx: profile index to check
+ * @mask: mask to match
+ */
+static bool
+ice_prof_has_mask_idx(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 idx,
+ u16 mask)
+{
+ bool expect_no_mask = false;
+ bool found = false;
+ bool match = false;
+ u16 i;
+
+ /* If mask is 0x0000 or 0xffff, then there is no masking */
+ if (mask == 0 || mask == 0xffff)
+ expect_no_mask = true;
+
+ /* Scan the enabled masks on this profile, for the specified idx */
+ for (i = hw->blk[blk].masks.first; i < hw->blk[blk].masks.first +
+ hw->blk[blk].masks.count; i++)
+ if (hw->blk[blk].es.mask_ena[prof] & BIT(i))
+ if (hw->blk[blk].masks.masks[i].in_use &&
+ hw->blk[blk].masks.masks[i].idx == idx) {
+ found = true;
+ if (hw->blk[blk].masks.masks[i].mask == mask)
+ match = true;
+ break;
+ }
+
+ if (expect_no_mask) {
+ if (found)
+ return false;
+ } else {
+ if (!match)
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * ice_prof_has_mask - determine if profile masking is identical
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @prof: profile to check
+ * @masks: masks to match
+ */
+static bool
+ice_prof_has_mask(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 *masks)
+{
+ u16 i;
+
+ /* es->mask_ena[prof] will have the mask */
+ for (i = 0; i < hw->blk[blk].es.fvw; i++)
+ if (!ice_prof_has_mask_idx(hw, blk, prof, i, masks[i]))
+ return false;
+
+ return true;
+}
+
+/**
+ * ice_find_prof_id_with_mask - find profile ID for a given field vector
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @fv: field vector to search for
+ * @masks: masks for FV
+ * @prof_id: receives the profile ID
+ */
+static int
+ice_find_prof_id_with_mask(struct ice_hw *hw, enum ice_block blk,
+ struct ice_fv_word *fv, u16 *masks, u8 *prof_id)
+{
+ struct ice_es *es = &hw->blk[blk].es;
+ u8 i;
+
+ /* For FD, we don't want to re-use a existed profile with the same
+ * field vector and mask. This will cause rule interference.
+ */
+ if (blk == ICE_BLK_FD)
+ return -ENOENT;
+
+ for (i = 0; i < (u8)es->count; i++) {
+ u16 off = i * es->fvw;
+
+ if (memcmp(&es->t[off], fv, es->fvw * sizeof(*fv)))
+ continue;
+
+ /* check if masks settings are the same for this profile */
+ if (masks && !ice_prof_has_mask(hw, blk, i, masks))
+ continue;
+
+ *prof_id = i;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+/**
+ * ice_prof_id_rsrc_type - get profile ID resource type for a block type
+ * @blk: the block type
+ * @rsrc_type: pointer to variable to receive the resource type
+ */
+static bool ice_prof_id_rsrc_type(enum ice_block blk, u16 *rsrc_type)
+{
+ switch (blk) {
+ case ICE_BLK_FD:
+ *rsrc_type = ICE_AQC_RES_TYPE_FD_PROF_BLDR_PROFID;
+ break;
+ case ICE_BLK_RSS:
+ *rsrc_type = ICE_AQC_RES_TYPE_HASH_PROF_BLDR_PROFID;
+ break;
+ default:
+ return false;
+ }
+ return true;
+}
+
+/**
+ * ice_tcam_ent_rsrc_type - get TCAM entry resource type for a block type
+ * @blk: the block type
+ * @rsrc_type: pointer to variable to receive the resource type
+ */
+static bool ice_tcam_ent_rsrc_type(enum ice_block blk, u16 *rsrc_type)
+{
+ switch (blk) {
+ case ICE_BLK_FD:
+ *rsrc_type = ICE_AQC_RES_TYPE_FD_PROF_BLDR_TCAM;
+ break;
+ case ICE_BLK_RSS:
+ *rsrc_type = ICE_AQC_RES_TYPE_HASH_PROF_BLDR_TCAM;
+ break;
+ default:
+ return false;
+ }
+ return true;
+}
+
+/**
+ * ice_alloc_tcam_ent - allocate hardware TCAM entry
+ * @hw: pointer to the HW struct
+ * @blk: the block to allocate the TCAM for
+ * @btm: true to allocate from bottom of table, false to allocate from top
+ * @tcam_idx: pointer to variable to receive the TCAM entry
+ *
+ * This function allocates a new entry in a Profile ID TCAM for a specific
+ * block.
+ */
+static int
+ice_alloc_tcam_ent(struct ice_hw *hw, enum ice_block blk, bool btm,
+ u16 *tcam_idx)
+{
+ u16 res_type;
+
+ if (!ice_tcam_ent_rsrc_type(blk, &res_type))
+ return -EINVAL;
+
+ return ice_alloc_hw_res(hw, res_type, 1, btm, tcam_idx);
+}
+
+/**
+ * ice_free_tcam_ent - free hardware TCAM entry
+ * @hw: pointer to the HW struct
+ * @blk: the block from which to free the TCAM entry
+ * @tcam_idx: the TCAM entry to free
+ *
+ * This function frees an entry in a Profile ID TCAM for a specific block.
+ */
+static int
+ice_free_tcam_ent(struct ice_hw *hw, enum ice_block blk, u16 tcam_idx)
+{
+ u16 res_type;
+
+ if (!ice_tcam_ent_rsrc_type(blk, &res_type))
+ return -EINVAL;
+
+ return ice_free_hw_res(hw, res_type, 1, &tcam_idx);
+}
+
+/**
+ * ice_alloc_prof_id - allocate profile ID
+ * @hw: pointer to the HW struct
+ * @blk: the block to allocate the profile ID for
+ * @prof_id: pointer to variable to receive the profile ID
+ *
+ * This function allocates a new profile ID, which also corresponds to a Field
+ * Vector (Extraction Sequence) entry.
+ */
+static int ice_alloc_prof_id(struct ice_hw *hw, enum ice_block blk, u8 *prof_id)
+{
+ u16 res_type;
+ u16 get_prof;
+ int status;
+
+ if (!ice_prof_id_rsrc_type(blk, &res_type))
+ return -EINVAL;
+
+ status = ice_alloc_hw_res(hw, res_type, 1, false, &get_prof);
+ if (!status)
+ *prof_id = (u8)get_prof;
+
+ return status;
+}
+
+/**
+ * ice_free_prof_id - free profile ID
+ * @hw: pointer to the HW struct
+ * @blk: the block from which to free the profile ID
+ * @prof_id: the profile ID to free
+ *
+ * This function frees a profile ID, which also corresponds to a Field Vector.
+ */
+static int ice_free_prof_id(struct ice_hw *hw, enum ice_block blk, u8 prof_id)
+{
+ u16 tmp_prof_id = (u16)prof_id;
+ u16 res_type;
+
+ if (!ice_prof_id_rsrc_type(blk, &res_type))
+ return -EINVAL;
+
+ return ice_free_hw_res(hw, res_type, 1, &tmp_prof_id);
+}
+
+/**
+ * ice_prof_inc_ref - increment reference count for profile
+ * @hw: pointer to the HW struct
+ * @blk: the block from which to free the profile ID
+ * @prof_id: the profile ID for which to increment the reference count
+ */
+static int ice_prof_inc_ref(struct ice_hw *hw, enum ice_block blk, u8 prof_id)
+{
+ if (prof_id > hw->blk[blk].es.count)
+ return -EINVAL;
+
+ hw->blk[blk].es.ref_count[prof_id]++;
+
+ return 0;
+}
+
+/**
+ * ice_write_prof_mask_reg - write profile mask register
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @mask_idx: mask index
+ * @idx: index of the FV which will use the mask
+ * @mask: the 16-bit mask
+ */
+static void
+ice_write_prof_mask_reg(struct ice_hw *hw, enum ice_block blk, u16 mask_idx,
+ u16 idx, u16 mask)
+{
+ u32 offset;
+ u32 val;
+
+ switch (blk) {
+ case ICE_BLK_RSS:
+ offset = GLQF_HMASK(mask_idx);
+ val = (idx << GLQF_HMASK_MSK_INDEX_S) & GLQF_HMASK_MSK_INDEX_M;
+ val |= (mask << GLQF_HMASK_MASK_S) & GLQF_HMASK_MASK_M;
+ break;
+ case ICE_BLK_FD:
+ offset = GLQF_FDMASK(mask_idx);
+ val = (idx << GLQF_FDMASK_MSK_INDEX_S) & GLQF_FDMASK_MSK_INDEX_M;
+ val |= (mask << GLQF_FDMASK_MASK_S) & GLQF_FDMASK_MASK_M;
+ break;
+ default:
+ ice_debug(hw, ICE_DBG_PKG, "No profile masks for block %d\n",
+ blk);
+ return;
+ }
+
+ wr32(hw, offset, val);
+ ice_debug(hw, ICE_DBG_PKG, "write mask, blk %d (%d): %x = %x\n",
+ blk, idx, offset, val);
+}
+
+/**
+ * ice_write_prof_mask_enable_res - write profile mask enable register
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @prof_id: profile ID
+ * @enable_mask: enable mask
+ */
+static void
+ice_write_prof_mask_enable_res(struct ice_hw *hw, enum ice_block blk,
+ u16 prof_id, u32 enable_mask)
+{
+ u32 offset;
+
+ switch (blk) {
+ case ICE_BLK_RSS:
+ offset = GLQF_HMASK_SEL(prof_id);
+ break;
+ case ICE_BLK_FD:
+ offset = GLQF_FDMASK_SEL(prof_id);
+ break;
+ default:
+ ice_debug(hw, ICE_DBG_PKG, "No profile masks for block %d\n",
+ blk);
+ return;
+ }
+
+ wr32(hw, offset, enable_mask);
+ ice_debug(hw, ICE_DBG_PKG, "write mask enable, blk %d (%d): %x = %x\n",
+ blk, prof_id, offset, enable_mask);
+}
+
+/**
+ * ice_init_prof_masks - initial prof masks
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ */
+static void ice_init_prof_masks(struct ice_hw *hw, enum ice_block blk)
+{
+ u16 per_pf;
+ u16 i;
+
+ mutex_init(&hw->blk[blk].masks.lock);
+
+ per_pf = ICE_PROF_MASK_COUNT / hw->dev_caps.num_funcs;
+
+ hw->blk[blk].masks.count = per_pf;
+ hw->blk[blk].masks.first = hw->pf_id * per_pf;
+
+ memset(hw->blk[blk].masks.masks, 0, sizeof(hw->blk[blk].masks.masks));
+
+ for (i = hw->blk[blk].masks.first;
+ i < hw->blk[blk].masks.first + hw->blk[blk].masks.count; i++)
+ ice_write_prof_mask_reg(hw, blk, i, 0, 0);
+}
+
+/**
+ * ice_init_all_prof_masks - initialize all prof masks
+ * @hw: pointer to the HW struct
+ */
+static void ice_init_all_prof_masks(struct ice_hw *hw)
+{
+ ice_init_prof_masks(hw, ICE_BLK_RSS);
+ ice_init_prof_masks(hw, ICE_BLK_FD);
+}
+
+/**
+ * ice_alloc_prof_mask - allocate profile mask
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @idx: index of FV which will use the mask
+ * @mask: the 16-bit mask
+ * @mask_idx: variable to receive the mask index
+ */
+static int
+ice_alloc_prof_mask(struct ice_hw *hw, enum ice_block blk, u16 idx, u16 mask,
+ u16 *mask_idx)
+{
+ bool found_unused = false, found_copy = false;
+ u16 unused_idx = 0, copy_idx = 0;
+ int status = -ENOSPC;
+ u16 i;
+
+ if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD)
+ return -EINVAL;
+
+ mutex_lock(&hw->blk[blk].masks.lock);
+
+ for (i = hw->blk[blk].masks.first;
+ i < hw->blk[blk].masks.first + hw->blk[blk].masks.count; i++)
+ if (hw->blk[blk].masks.masks[i].in_use) {
+ /* if mask is in use and it exactly duplicates the
+ * desired mask and index, then in can be reused
+ */
+ if (hw->blk[blk].masks.masks[i].mask == mask &&
+ hw->blk[blk].masks.masks[i].idx == idx) {
+ found_copy = true;
+ copy_idx = i;
+ break;
+ }
+ } else {
+ /* save off unused index, but keep searching in case
+ * there is an exact match later on
+ */
+ if (!found_unused) {
+ found_unused = true;
+ unused_idx = i;
+ }
+ }
+
+ if (found_copy)
+ i = copy_idx;
+ else if (found_unused)
+ i = unused_idx;
+ else
+ goto err_ice_alloc_prof_mask;
+
+ /* update mask for a new entry */
+ if (found_unused) {
+ hw->blk[blk].masks.masks[i].in_use = true;
+ hw->blk[blk].masks.masks[i].mask = mask;
+ hw->blk[blk].masks.masks[i].idx = idx;
+ hw->blk[blk].masks.masks[i].ref = 0;
+ ice_write_prof_mask_reg(hw, blk, i, idx, mask);
+ }
+
+ hw->blk[blk].masks.masks[i].ref++;
+ *mask_idx = i;
+ status = 0;
+
+err_ice_alloc_prof_mask:
+ mutex_unlock(&hw->blk[blk].masks.lock);
+
+ return status;
+}
+
+/**
+ * ice_free_prof_mask - free profile mask
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @mask_idx: index of mask
+ */
+static int
+ice_free_prof_mask(struct ice_hw *hw, enum ice_block blk, u16 mask_idx)
+{
+ if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD)
+ return -EINVAL;
+
+ if (!(mask_idx >= hw->blk[blk].masks.first &&
+ mask_idx < hw->blk[blk].masks.first + hw->blk[blk].masks.count))
+ return -ENOENT;
+
+ mutex_lock(&hw->blk[blk].masks.lock);
+
+ if (!hw->blk[blk].masks.masks[mask_idx].in_use)
+ goto exit_ice_free_prof_mask;
+
+ if (hw->blk[blk].masks.masks[mask_idx].ref > 1) {
+ hw->blk[blk].masks.masks[mask_idx].ref--;
+ goto exit_ice_free_prof_mask;
+ }
+
+ /* remove mask */
+ hw->blk[blk].masks.masks[mask_idx].in_use = false;
+ hw->blk[blk].masks.masks[mask_idx].mask = 0;
+ hw->blk[blk].masks.masks[mask_idx].idx = 0;
+
+ /* update mask as unused entry */
+ ice_debug(hw, ICE_DBG_PKG, "Free mask, blk %d, mask %d\n", blk,
+ mask_idx);
+ ice_write_prof_mask_reg(hw, blk, mask_idx, 0, 0);
+
+exit_ice_free_prof_mask:
+ mutex_unlock(&hw->blk[blk].masks.lock);
+
+ return 0;
+}
+
+/**
+ * ice_free_prof_masks - free all profile masks for a profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @prof_id: profile ID
+ */
+static int
+ice_free_prof_masks(struct ice_hw *hw, enum ice_block blk, u16 prof_id)
+{
+ u32 mask_bm;
+ u16 i;
+
+ if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD)
+ return -EINVAL;
+
+ mask_bm = hw->blk[blk].es.mask_ena[prof_id];
+ for (i = 0; i < BITS_PER_BYTE * sizeof(mask_bm); i++)
+ if (mask_bm & BIT(i))
+ ice_free_prof_mask(hw, blk, i);
+
+ return 0;
+}
+
+/**
+ * ice_shutdown_prof_masks - releases lock for masking
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ *
+ * This should be called before unloading the driver
+ */
+static void ice_shutdown_prof_masks(struct ice_hw *hw, enum ice_block blk)
+{
+ u16 i;
+
+ mutex_lock(&hw->blk[blk].masks.lock);
+
+ for (i = hw->blk[blk].masks.first;
+ i < hw->blk[blk].masks.first + hw->blk[blk].masks.count; i++) {
+ ice_write_prof_mask_reg(hw, blk, i, 0, 0);
+
+ hw->blk[blk].masks.masks[i].in_use = false;
+ hw->blk[blk].masks.masks[i].idx = 0;
+ hw->blk[blk].masks.masks[i].mask = 0;
+ }
+
+ mutex_unlock(&hw->blk[blk].masks.lock);
+ mutex_destroy(&hw->blk[blk].masks.lock);
+}
+
+/**
+ * ice_shutdown_all_prof_masks - releases all locks for masking
+ * @hw: pointer to the HW struct
+ *
+ * This should be called before unloading the driver
+ */
+static void ice_shutdown_all_prof_masks(struct ice_hw *hw)
+{
+ ice_shutdown_prof_masks(hw, ICE_BLK_RSS);
+ ice_shutdown_prof_masks(hw, ICE_BLK_FD);
+}
+
+/**
+ * ice_update_prof_masking - set registers according to masking
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @prof_id: profile ID
+ * @masks: masks
+ */
+static int
+ice_update_prof_masking(struct ice_hw *hw, enum ice_block blk, u16 prof_id,
+ u16 *masks)
+{
+ bool err = false;
+ u32 ena_mask = 0;
+ u16 idx;
+ u16 i;
+
+ /* Only support FD and RSS masking, otherwise nothing to be done */
+ if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD)
+ return 0;
+
+ for (i = 0; i < hw->blk[blk].es.fvw; i++)
+ if (masks[i] && masks[i] != 0xFFFF) {
+ if (!ice_alloc_prof_mask(hw, blk, i, masks[i], &idx)) {
+ ena_mask |= BIT(idx);
+ } else {
+ /* not enough bitmaps */
+ err = true;
+ break;
+ }
+ }
+
+ if (err) {
+ /* free any bitmaps we have allocated */
+ for (i = 0; i < BITS_PER_BYTE * sizeof(ena_mask); i++)
+ if (ena_mask & BIT(i))
+ ice_free_prof_mask(hw, blk, i);
+
+ return -EIO;
+ }
+
+ /* enable the masks for this profile */
+ ice_write_prof_mask_enable_res(hw, blk, prof_id, ena_mask);
+
+ /* store enabled masks with profile so that they can be freed later */
+ hw->blk[blk].es.mask_ena[prof_id] = ena_mask;
+
+ return 0;
+}
+
+/**
+ * ice_write_es - write an extraction sequence to hardware
+ * @hw: pointer to the HW struct
+ * @blk: the block in which to write the extraction sequence
+ * @prof_id: the profile ID to write
+ * @fv: pointer to the extraction sequence to write - NULL to clear extraction
+ */
+static void
+ice_write_es(struct ice_hw *hw, enum ice_block blk, u8 prof_id,
+ struct ice_fv_word *fv)
+{
+ u16 off;
+
+ off = prof_id * hw->blk[blk].es.fvw;
+ if (!fv) {
+ memset(&hw->blk[blk].es.t[off], 0,
+ hw->blk[blk].es.fvw * sizeof(*fv));
+ hw->blk[blk].es.written[prof_id] = false;
+ } else {
+ memcpy(&hw->blk[blk].es.t[off], fv,
+ hw->blk[blk].es.fvw * sizeof(*fv));
+ }
+}
+
+/**
+ * ice_prof_dec_ref - decrement reference count for profile
+ * @hw: pointer to the HW struct
+ * @blk: the block from which to free the profile ID
+ * @prof_id: the profile ID for which to decrement the reference count
+ */
+static int
+ice_prof_dec_ref(struct ice_hw *hw, enum ice_block blk, u8 prof_id)
+{
+ if (prof_id > hw->blk[blk].es.count)
+ return -EINVAL;
+
+ if (hw->blk[blk].es.ref_count[prof_id] > 0) {
+ if (!--hw->blk[blk].es.ref_count[prof_id]) {
+ ice_write_es(hw, blk, prof_id, NULL);
+ ice_free_prof_masks(hw, blk, prof_id);
+ return ice_free_prof_id(hw, blk, prof_id);
+ }
+ }
+
+ return 0;
+}
+
+/* Block / table section IDs */
+static const u32 ice_blk_sids[ICE_BLK_COUNT][ICE_SID_OFF_COUNT] = {
+ /* SWITCH */
+ { ICE_SID_XLT1_SW,
+ ICE_SID_XLT2_SW,
+ ICE_SID_PROFID_TCAM_SW,
+ ICE_SID_PROFID_REDIR_SW,
+ ICE_SID_FLD_VEC_SW
+ },
+
+ /* ACL */
+ { ICE_SID_XLT1_ACL,
+ ICE_SID_XLT2_ACL,
+ ICE_SID_PROFID_TCAM_ACL,
+ ICE_SID_PROFID_REDIR_ACL,
+ ICE_SID_FLD_VEC_ACL
+ },
+
+ /* FD */
+ { ICE_SID_XLT1_FD,
+ ICE_SID_XLT2_FD,
+ ICE_SID_PROFID_TCAM_FD,
+ ICE_SID_PROFID_REDIR_FD,
+ ICE_SID_FLD_VEC_FD
+ },
+
+ /* RSS */
+ { ICE_SID_XLT1_RSS,
+ ICE_SID_XLT2_RSS,
+ ICE_SID_PROFID_TCAM_RSS,
+ ICE_SID_PROFID_REDIR_RSS,
+ ICE_SID_FLD_VEC_RSS
+ },
+
+ /* PE */
+ { ICE_SID_XLT1_PE,
+ ICE_SID_XLT2_PE,
+ ICE_SID_PROFID_TCAM_PE,
+ ICE_SID_PROFID_REDIR_PE,
+ ICE_SID_FLD_VEC_PE
+ }
+};
+
+/**
+ * ice_init_sw_xlt1_db - init software XLT1 database from HW tables
+ * @hw: pointer to the hardware structure
+ * @blk: the HW block to initialize
+ */
+static void ice_init_sw_xlt1_db(struct ice_hw *hw, enum ice_block blk)
+{
+ u16 pt;
+
+ for (pt = 0; pt < hw->blk[blk].xlt1.count; pt++) {
+ u8 ptg;
+
+ ptg = hw->blk[blk].xlt1.t[pt];
+ if (ptg != ICE_DEFAULT_PTG) {
+ ice_ptg_alloc_val(hw, blk, ptg);
+ ice_ptg_add_mv_ptype(hw, blk, pt, ptg);
+ }
+ }
+}
+
+/**
+ * ice_init_sw_xlt2_db - init software XLT2 database from HW tables
+ * @hw: pointer to the hardware structure
+ * @blk: the HW block to initialize
+ */
+static void ice_init_sw_xlt2_db(struct ice_hw *hw, enum ice_block blk)
+{
+ u16 vsi;
+
+ for (vsi = 0; vsi < hw->blk[blk].xlt2.count; vsi++) {
+ u16 vsig;
+
+ vsig = hw->blk[blk].xlt2.t[vsi];
+ if (vsig) {
+ ice_vsig_alloc_val(hw, blk, vsig);
+ ice_vsig_add_mv_vsi(hw, blk, vsi, vsig);
+ /* no changes at this time, since this has been
+ * initialized from the original package
+ */
+ hw->blk[blk].xlt2.vsis[vsi].changed = 0;
+ }
+ }
+}
+
+/**
+ * ice_init_sw_db - init software database from HW tables
+ * @hw: pointer to the hardware structure
+ */
+static void ice_init_sw_db(struct ice_hw *hw)
+{
+ u16 i;
+
+ for (i = 0; i < ICE_BLK_COUNT; i++) {
+ ice_init_sw_xlt1_db(hw, (enum ice_block)i);
+ ice_init_sw_xlt2_db(hw, (enum ice_block)i);
+ }
+}
+
+/**
+ * ice_fill_tbl - Reads content of a single table type into database
+ * @hw: pointer to the hardware structure
+ * @block_id: Block ID of the table to copy
+ * @sid: Section ID of the table to copy
+ *
+ * Will attempt to read the entire content of a given table of a single block
+ * into the driver database. We assume that the buffer will always
+ * be as large or larger than the data contained in the package. If
+ * this condition is not met, there is most likely an error in the package
+ * contents.
+ */
+static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid)
+{
+ u32 dst_len, sect_len, offset = 0;
+ struct ice_prof_redir_section *pr;
+ struct ice_prof_id_section *pid;
+ struct ice_xlt1_section *xlt1;
+ struct ice_xlt2_section *xlt2;
+ struct ice_sw_fv_section *es;
+ struct ice_pkg_enum state;
+ u8 *src, *dst;
+ void *sect;
+
+ /* if the HW segment pointer is null then the first iteration of
+ * ice_pkg_enum_section() will fail. In this case the HW tables will
+ * not be filled and return success.
+ */
+ if (!hw->seg) {
+ ice_debug(hw, ICE_DBG_PKG, "hw->seg is NULL, tables are not filled\n");
+ return;
+ }
+
+ memset(&state, 0, sizeof(state));
+
+ sect = ice_pkg_enum_section(hw->seg, &state, sid);
+
+ while (sect) {
+ switch (sid) {
+ case ICE_SID_XLT1_SW:
+ case ICE_SID_XLT1_FD:
+ case ICE_SID_XLT1_RSS:
+ case ICE_SID_XLT1_ACL:
+ case ICE_SID_XLT1_PE:
+ xlt1 = sect;
+ src = xlt1->value;
+ sect_len = le16_to_cpu(xlt1->count) *
+ sizeof(*hw->blk[block_id].xlt1.t);
+ dst = hw->blk[block_id].xlt1.t;
+ dst_len = hw->blk[block_id].xlt1.count *
+ sizeof(*hw->blk[block_id].xlt1.t);
+ break;
+ case ICE_SID_XLT2_SW:
+ case ICE_SID_XLT2_FD:
+ case ICE_SID_XLT2_RSS:
+ case ICE_SID_XLT2_ACL:
+ case ICE_SID_XLT2_PE:
+ xlt2 = sect;
+ src = (__force u8 *)xlt2->value;
+ sect_len = le16_to_cpu(xlt2->count) *
+ sizeof(*hw->blk[block_id].xlt2.t);
+ dst = (u8 *)hw->blk[block_id].xlt2.t;
+ dst_len = hw->blk[block_id].xlt2.count *
+ sizeof(*hw->blk[block_id].xlt2.t);
+ break;
+ case ICE_SID_PROFID_TCAM_SW:
+ case ICE_SID_PROFID_TCAM_FD:
+ case ICE_SID_PROFID_TCAM_RSS:
+ case ICE_SID_PROFID_TCAM_ACL:
+ case ICE_SID_PROFID_TCAM_PE:
+ pid = sect;
+ src = (u8 *)pid->entry;
+ sect_len = le16_to_cpu(pid->count) *
+ sizeof(*hw->blk[block_id].prof.t);
+ dst = (u8 *)hw->blk[block_id].prof.t;
+ dst_len = hw->blk[block_id].prof.count *
+ sizeof(*hw->blk[block_id].prof.t);
+ break;
+ case ICE_SID_PROFID_REDIR_SW:
+ case ICE_SID_PROFID_REDIR_FD:
+ case ICE_SID_PROFID_REDIR_RSS:
+ case ICE_SID_PROFID_REDIR_ACL:
+ case ICE_SID_PROFID_REDIR_PE:
+ pr = sect;
+ src = pr->redir_value;
+ sect_len = le16_to_cpu(pr->count) *
+ sizeof(*hw->blk[block_id].prof_redir.t);
+ dst = hw->blk[block_id].prof_redir.t;
+ dst_len = hw->blk[block_id].prof_redir.count *
+ sizeof(*hw->blk[block_id].prof_redir.t);
+ break;
+ case ICE_SID_FLD_VEC_SW:
+ case ICE_SID_FLD_VEC_FD:
+ case ICE_SID_FLD_VEC_RSS:
+ case ICE_SID_FLD_VEC_ACL:
+ case ICE_SID_FLD_VEC_PE:
+ es = sect;
+ src = (u8 *)es->fv;
+ sect_len = (u32)(le16_to_cpu(es->count) *
+ hw->blk[block_id].es.fvw) *
+ sizeof(*hw->blk[block_id].es.t);
+ dst = (u8 *)hw->blk[block_id].es.t;
+ dst_len = (u32)(hw->blk[block_id].es.count *
+ hw->blk[block_id].es.fvw) *
+ sizeof(*hw->blk[block_id].es.t);
+ break;
+ default:
+ return;
+ }
+
+ /* if the section offset exceeds destination length, terminate
+ * table fill.
+ */
+ if (offset > dst_len)
+ return;
+
+ /* if the sum of section size and offset exceed destination size
+ * then we are out of bounds of the HW table size for that PF.
+ * Changing section length to fill the remaining table space
+ * of that PF.
+ */
+ if ((offset + sect_len) > dst_len)
+ sect_len = dst_len - offset;
+
+ memcpy(dst + offset, src, sect_len);
+ offset += sect_len;
+ sect = ice_pkg_enum_section(NULL, &state, sid);
+ }
+}
+
+/**
+ * ice_fill_blk_tbls - Read package context for tables
+ * @hw: pointer to the hardware structure
+ *
+ * Reads the current package contents and populates the driver
+ * database with the data iteratively for all advanced feature
+ * blocks. Assume that the HW tables have been allocated.
+ */
+void ice_fill_blk_tbls(struct ice_hw *hw)
+{
+ u8 i;
+
+ for (i = 0; i < ICE_BLK_COUNT; i++) {
+ enum ice_block blk_id = (enum ice_block)i;
+
+ ice_fill_tbl(hw, blk_id, hw->blk[blk_id].xlt1.sid);
+ ice_fill_tbl(hw, blk_id, hw->blk[blk_id].xlt2.sid);
+ ice_fill_tbl(hw, blk_id, hw->blk[blk_id].prof.sid);
+ ice_fill_tbl(hw, blk_id, hw->blk[blk_id].prof_redir.sid);
+ ice_fill_tbl(hw, blk_id, hw->blk[blk_id].es.sid);
+ }
+
+ ice_init_sw_db(hw);
+}
+
+/**
+ * ice_free_prof_map - free profile map
+ * @hw: pointer to the hardware structure
+ * @blk_idx: HW block index
+ */
+static void ice_free_prof_map(struct ice_hw *hw, u8 blk_idx)
+{
+ struct ice_es *es = &hw->blk[blk_idx].es;
+ struct ice_prof_map *del, *tmp;
+
+ mutex_lock(&es->prof_map_lock);
+ list_for_each_entry_safe(del, tmp, &es->prof_map, list) {
+ list_del(&del->list);
+ devm_kfree(ice_hw_to_dev(hw), del);
+ }
+ INIT_LIST_HEAD(&es->prof_map);
+ mutex_unlock(&es->prof_map_lock);
+}
+
+/**
+ * ice_free_flow_profs - free flow profile entries
+ * @hw: pointer to the hardware structure
+ * @blk_idx: HW block index
+ */
+static void ice_free_flow_profs(struct ice_hw *hw, u8 blk_idx)
+{
+ struct ice_flow_prof *p, *tmp;
+
+ mutex_lock(&hw->fl_profs_locks[blk_idx]);
+ list_for_each_entry_safe(p, tmp, &hw->fl_profs[blk_idx], l_entry) {
+ struct ice_flow_entry *e, *t;
+
+ list_for_each_entry_safe(e, t, &p->entries, l_entry)
+ ice_flow_rem_entry(hw, (enum ice_block)blk_idx,
+ ICE_FLOW_ENTRY_HNDL(e));
+
+ list_del(&p->l_entry);
+
+ mutex_destroy(&p->entries_lock);
+ devm_kfree(ice_hw_to_dev(hw), p);
+ }
+ mutex_unlock(&hw->fl_profs_locks[blk_idx]);
+
+ /* if driver is in reset and tables are being cleared
+ * re-initialize the flow profile list heads
+ */
+ INIT_LIST_HEAD(&hw->fl_profs[blk_idx]);
+}
+
+/**
+ * ice_free_vsig_tbl - free complete VSIG table entries
+ * @hw: pointer to the hardware structure
+ * @blk: the HW block on which to free the VSIG table entries
+ */
+static void ice_free_vsig_tbl(struct ice_hw *hw, enum ice_block blk)
+{
+ u16 i;
+
+ if (!hw->blk[blk].xlt2.vsig_tbl)
+ return;
+
+ for (i = 1; i < ICE_MAX_VSIGS; i++)
+ if (hw->blk[blk].xlt2.vsig_tbl[i].in_use)
+ ice_vsig_free(hw, blk, i);
+}
+
+/**
+ * ice_free_hw_tbls - free hardware table memory
+ * @hw: pointer to the hardware structure
+ */
+void ice_free_hw_tbls(struct ice_hw *hw)
+{
+ struct ice_rss_cfg *r, *rt;
+ u8 i;
+
+ for (i = 0; i < ICE_BLK_COUNT; i++) {
+ if (hw->blk[i].is_list_init) {
+ struct ice_es *es = &hw->blk[i].es;
+
+ ice_free_prof_map(hw, i);
+ mutex_destroy(&es->prof_map_lock);
+
+ ice_free_flow_profs(hw, i);
+ mutex_destroy(&hw->fl_profs_locks[i]);
+
+ hw->blk[i].is_list_init = false;
+ }
+ ice_free_vsig_tbl(hw, (enum ice_block)i);
+ devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.ptypes);
+ devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.ptg_tbl);
+ devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.t);
+ devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt2.t);
+ devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt2.vsig_tbl);
+ devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt2.vsis);
+ devm_kfree(ice_hw_to_dev(hw), hw->blk[i].prof.t);
+ devm_kfree(ice_hw_to_dev(hw), hw->blk[i].prof_redir.t);
+ devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.t);
+ devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.ref_count);
+ devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.written);
+ devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.mask_ena);
+ }
+
+ list_for_each_entry_safe(r, rt, &hw->rss_list_head, l_entry) {
+ list_del(&r->l_entry);
+ devm_kfree(ice_hw_to_dev(hw), r);
+ }
+ mutex_destroy(&hw->rss_locks);
+ ice_shutdown_all_prof_masks(hw);
+ memset(hw->blk, 0, sizeof(hw->blk));
+}
+
+/**
+ * ice_init_flow_profs - init flow profile locks and list heads
+ * @hw: pointer to the hardware structure
+ * @blk_idx: HW block index
+ */
+static void ice_init_flow_profs(struct ice_hw *hw, u8 blk_idx)
+{
+ mutex_init(&hw->fl_profs_locks[blk_idx]);
+ INIT_LIST_HEAD(&hw->fl_profs[blk_idx]);
+}
+
+/**
+ * ice_clear_hw_tbls - clear HW tables and flow profiles
+ * @hw: pointer to the hardware structure
+ */
+void ice_clear_hw_tbls(struct ice_hw *hw)
+{
+ u8 i;
+
+ for (i = 0; i < ICE_BLK_COUNT; i++) {
+ struct ice_prof_redir *prof_redir = &hw->blk[i].prof_redir;
+ struct ice_prof_tcam *prof = &hw->blk[i].prof;
+ struct ice_xlt1 *xlt1 = &hw->blk[i].xlt1;
+ struct ice_xlt2 *xlt2 = &hw->blk[i].xlt2;
+ struct ice_es *es = &hw->blk[i].es;
+
+ if (hw->blk[i].is_list_init) {
+ ice_free_prof_map(hw, i);
+ ice_free_flow_profs(hw, i);
+ }
+
+ ice_free_vsig_tbl(hw, (enum ice_block)i);
+
+ memset(xlt1->ptypes, 0, xlt1->count * sizeof(*xlt1->ptypes));
+ memset(xlt1->ptg_tbl, 0,
+ ICE_MAX_PTGS * sizeof(*xlt1->ptg_tbl));
+ memset(xlt1->t, 0, xlt1->count * sizeof(*xlt1->t));
+
+ memset(xlt2->vsis, 0, xlt2->count * sizeof(*xlt2->vsis));
+ memset(xlt2->vsig_tbl, 0,
+ xlt2->count * sizeof(*xlt2->vsig_tbl));
+ memset(xlt2->t, 0, xlt2->count * sizeof(*xlt2->t));
+
+ memset(prof->t, 0, prof->count * sizeof(*prof->t));
+ memset(prof_redir->t, 0,
+ prof_redir->count * sizeof(*prof_redir->t));
+
+ memset(es->t, 0, es->count * sizeof(*es->t) * es->fvw);
+ memset(es->ref_count, 0, es->count * sizeof(*es->ref_count));
+ memset(es->written, 0, es->count * sizeof(*es->written));
+ memset(es->mask_ena, 0, es->count * sizeof(*es->mask_ena));
+ }
+}
+
+/**
+ * ice_init_hw_tbls - init hardware table memory
+ * @hw: pointer to the hardware structure
+ */
+int ice_init_hw_tbls(struct ice_hw *hw)
+{
+ u8 i;
+
+ mutex_init(&hw->rss_locks);
+ INIT_LIST_HEAD(&hw->rss_list_head);
+ ice_init_all_prof_masks(hw);
+ for (i = 0; i < ICE_BLK_COUNT; i++) {
+ struct ice_prof_redir *prof_redir = &hw->blk[i].prof_redir;
+ struct ice_prof_tcam *prof = &hw->blk[i].prof;
+ struct ice_xlt1 *xlt1 = &hw->blk[i].xlt1;
+ struct ice_xlt2 *xlt2 = &hw->blk[i].xlt2;
+ struct ice_es *es = &hw->blk[i].es;
+ u16 j;
+
+ if (hw->blk[i].is_list_init)
+ continue;
+
+ ice_init_flow_profs(hw, i);
+ mutex_init(&es->prof_map_lock);
+ INIT_LIST_HEAD(&es->prof_map);
+ hw->blk[i].is_list_init = true;
+
+ hw->blk[i].overwrite = blk_sizes[i].overwrite;
+ es->reverse = blk_sizes[i].reverse;
+
+ xlt1->sid = ice_blk_sids[i][ICE_SID_XLT1_OFF];
+ xlt1->count = blk_sizes[i].xlt1;
+
+ xlt1->ptypes = devm_kcalloc(ice_hw_to_dev(hw), xlt1->count,
+ sizeof(*xlt1->ptypes), GFP_KERNEL);
+
+ if (!xlt1->ptypes)
+ goto err;
+
+ xlt1->ptg_tbl = devm_kcalloc(ice_hw_to_dev(hw), ICE_MAX_PTGS,
+ sizeof(*xlt1->ptg_tbl),
+ GFP_KERNEL);
+
+ if (!xlt1->ptg_tbl)
+ goto err;
+
+ xlt1->t = devm_kcalloc(ice_hw_to_dev(hw), xlt1->count,
+ sizeof(*xlt1->t), GFP_KERNEL);
+ if (!xlt1->t)
+ goto err;
+
+ xlt2->sid = ice_blk_sids[i][ICE_SID_XLT2_OFF];
+ xlt2->count = blk_sizes[i].xlt2;
+
+ xlt2->vsis = devm_kcalloc(ice_hw_to_dev(hw), xlt2->count,
+ sizeof(*xlt2->vsis), GFP_KERNEL);
+
+ if (!xlt2->vsis)
+ goto err;
+
+ xlt2->vsig_tbl = devm_kcalloc(ice_hw_to_dev(hw), xlt2->count,
+ sizeof(*xlt2->vsig_tbl),
+ GFP_KERNEL);
+ if (!xlt2->vsig_tbl)
+ goto err;
+
+ for (j = 0; j < xlt2->count; j++)
+ INIT_LIST_HEAD(&xlt2->vsig_tbl[j].prop_lst);
+
+ xlt2->t = devm_kcalloc(ice_hw_to_dev(hw), xlt2->count,
+ sizeof(*xlt2->t), GFP_KERNEL);
+ if (!xlt2->t)
+ goto err;
+
+ prof->sid = ice_blk_sids[i][ICE_SID_PR_OFF];
+ prof->count = blk_sizes[i].prof_tcam;
+ prof->max_prof_id = blk_sizes[i].prof_id;
+ prof->cdid_bits = blk_sizes[i].prof_cdid_bits;
+ prof->t = devm_kcalloc(ice_hw_to_dev(hw), prof->count,
+ sizeof(*prof->t), GFP_KERNEL);
+
+ if (!prof->t)
+ goto err;
+
+ prof_redir->sid = ice_blk_sids[i][ICE_SID_PR_REDIR_OFF];
+ prof_redir->count = blk_sizes[i].prof_redir;
+ prof_redir->t = devm_kcalloc(ice_hw_to_dev(hw),
+ prof_redir->count,
+ sizeof(*prof_redir->t),
+ GFP_KERNEL);
+
+ if (!prof_redir->t)
+ goto err;
+
+ es->sid = ice_blk_sids[i][ICE_SID_ES_OFF];
+ es->count = blk_sizes[i].es;
+ es->fvw = blk_sizes[i].fvw;
+ es->t = devm_kcalloc(ice_hw_to_dev(hw),
+ (u32)(es->count * es->fvw),
+ sizeof(*es->t), GFP_KERNEL);
+ if (!es->t)
+ goto err;
+
+ es->ref_count = devm_kcalloc(ice_hw_to_dev(hw), es->count,
+ sizeof(*es->ref_count),
+ GFP_KERNEL);
+ if (!es->ref_count)
+ goto err;
+
+ es->written = devm_kcalloc(ice_hw_to_dev(hw), es->count,
+ sizeof(*es->written), GFP_KERNEL);
+ if (!es->written)
+ goto err;
+
+ es->mask_ena = devm_kcalloc(ice_hw_to_dev(hw), es->count,
+ sizeof(*es->mask_ena), GFP_KERNEL);
+ if (!es->mask_ena)
+ goto err;
+ }
+ return 0;
+
+err:
+ ice_free_hw_tbls(hw);
+ return -ENOMEM;
+}
+
+/**
+ * ice_prof_gen_key - generate profile ID key
+ * @hw: pointer to the HW struct
+ * @blk: the block in which to write profile ID to
+ * @ptg: packet type group (PTG) portion of key
+ * @vsig: VSIG portion of key
+ * @cdid: CDID portion of key
+ * @flags: flag portion of key
+ * @vl_msk: valid mask
+ * @dc_msk: don't care mask
+ * @nm_msk: never match mask
+ * @key: output of profile ID key
+ */
+static int
+ice_prof_gen_key(struct ice_hw *hw, enum ice_block blk, u8 ptg, u16 vsig,
+ u8 cdid, u16 flags, u8 vl_msk[ICE_TCAM_KEY_VAL_SZ],
+ u8 dc_msk[ICE_TCAM_KEY_VAL_SZ], u8 nm_msk[ICE_TCAM_KEY_VAL_SZ],
+ u8 key[ICE_TCAM_KEY_SZ])
+{
+ struct ice_prof_id_key inkey;
+
+ inkey.xlt1 = ptg;
+ inkey.xlt2_cdid = cpu_to_le16(vsig);
+ inkey.flags = cpu_to_le16(flags);
+
+ switch (hw->blk[blk].prof.cdid_bits) {
+ case 0:
+ break;
+ case 2:
+#define ICE_CD_2_M 0xC000U
+#define ICE_CD_2_S 14
+ inkey.xlt2_cdid &= ~cpu_to_le16(ICE_CD_2_M);
+ inkey.xlt2_cdid |= cpu_to_le16(BIT(cdid) << ICE_CD_2_S);
+ break;
+ case 4:
+#define ICE_CD_4_M 0xF000U
+#define ICE_CD_4_S 12
+ inkey.xlt2_cdid &= ~cpu_to_le16(ICE_CD_4_M);
+ inkey.xlt2_cdid |= cpu_to_le16(BIT(cdid) << ICE_CD_4_S);
+ break;
+ case 8:
+#define ICE_CD_8_M 0xFF00U
+#define ICE_CD_8_S 16
+ inkey.xlt2_cdid &= ~cpu_to_le16(ICE_CD_8_M);
+ inkey.xlt2_cdid |= cpu_to_le16(BIT(cdid) << ICE_CD_8_S);
+ break;
+ default:
+ ice_debug(hw, ICE_DBG_PKG, "Error in profile config\n");
+ break;
+ }
+
+ return ice_set_key(key, ICE_TCAM_KEY_SZ, (u8 *)&inkey, vl_msk, dc_msk,
+ nm_msk, 0, ICE_TCAM_KEY_SZ / 2);
+}
+
+/**
+ * ice_tcam_write_entry - write TCAM entry
+ * @hw: pointer to the HW struct
+ * @blk: the block in which to write profile ID to
+ * @idx: the entry index to write to
+ * @prof_id: profile ID
+ * @ptg: packet type group (PTG) portion of key
+ * @vsig: VSIG portion of key
+ * @cdid: CDID portion of key
+ * @flags: flag portion of key
+ * @vl_msk: valid mask
+ * @dc_msk: don't care mask
+ * @nm_msk: never match mask
+ */
+static int
+ice_tcam_write_entry(struct ice_hw *hw, enum ice_block blk, u16 idx,
+ u8 prof_id, u8 ptg, u16 vsig, u8 cdid, u16 flags,
+ u8 vl_msk[ICE_TCAM_KEY_VAL_SZ],
+ u8 dc_msk[ICE_TCAM_KEY_VAL_SZ],
+ u8 nm_msk[ICE_TCAM_KEY_VAL_SZ])
+{
+ struct ice_prof_tcam_entry;
+ int status;
+
+ status = ice_prof_gen_key(hw, blk, ptg, vsig, cdid, flags, vl_msk,
+ dc_msk, nm_msk, hw->blk[blk].prof.t[idx].key);
+ if (!status) {
+ hw->blk[blk].prof.t[idx].addr = cpu_to_le16(idx);
+ hw->blk[blk].prof.t[idx].prof_id = prof_id;
+ }
+
+ return status;
+}
+
+/**
+ * ice_vsig_get_ref - returns number of VSIs belong to a VSIG
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsig: VSIG to query
+ * @refs: pointer to variable to receive the reference count
+ */
+static int
+ice_vsig_get_ref(struct ice_hw *hw, enum ice_block blk, u16 vsig, u16 *refs)
+{
+ u16 idx = vsig & ICE_VSIG_IDX_M;
+ struct ice_vsig_vsi *ptr;
+
+ *refs = 0;
+
+ if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use)
+ return -ENOENT;
+
+ ptr = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+ while (ptr) {
+ (*refs)++;
+ ptr = ptr->next_vsi;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_has_prof_vsig - check to see if VSIG has a specific profile
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsig: VSIG to check against
+ * @hdl: profile handle
+ */
+static bool
+ice_has_prof_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl)
+{
+ u16 idx = vsig & ICE_VSIG_IDX_M;
+ struct ice_vsig_prof *ent;
+
+ list_for_each_entry(ent, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+ list)
+ if (ent->profile_cookie == hdl)
+ return true;
+
+ ice_debug(hw, ICE_DBG_INIT, "Characteristic list for VSI group %d not found.\n",
+ vsig);
+ return false;
+}
+
+/**
+ * ice_prof_bld_es - build profile ID extraction sequence changes
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @bld: the update package buffer build to add to
+ * @chgs: the list of changes to make in hardware
+ */
+static int
+ice_prof_bld_es(struct ice_hw *hw, enum ice_block blk,
+ struct ice_buf_build *bld, struct list_head *chgs)
+{
+ u16 vec_size = hw->blk[blk].es.fvw * sizeof(struct ice_fv_word);
+ struct ice_chs_chg *tmp;
+
+ list_for_each_entry(tmp, chgs, list_entry)
+ if (tmp->type == ICE_PTG_ES_ADD && tmp->add_prof) {
+ u16 off = tmp->prof_id * hw->blk[blk].es.fvw;
+ struct ice_pkg_es *p;
+ u32 id;
+
+ id = ice_sect_id(blk, ICE_VEC_TBL);
+ p = ice_pkg_buf_alloc_section(bld, id,
+ struct_size(p, es, 1) +
+ vec_size -
+ sizeof(p->es[0]));
+
+ if (!p)
+ return -ENOSPC;
+
+ p->count = cpu_to_le16(1);
+ p->offset = cpu_to_le16(tmp->prof_id);
+
+ memcpy(p->es, &hw->blk[blk].es.t[off], vec_size);
+ }
+
+ return 0;
+}
+
+/**
+ * ice_prof_bld_tcam - build profile ID TCAM changes
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @bld: the update package buffer build to add to
+ * @chgs: the list of changes to make in hardware
+ */
+static int
+ice_prof_bld_tcam(struct ice_hw *hw, enum ice_block blk,
+ struct ice_buf_build *bld, struct list_head *chgs)
+{
+ struct ice_chs_chg *tmp;
+
+ list_for_each_entry(tmp, chgs, list_entry)
+ if (tmp->type == ICE_TCAM_ADD && tmp->add_tcam_idx) {
+ struct ice_prof_id_section *p;
+ u32 id;
+
+ id = ice_sect_id(blk, ICE_PROF_TCAM);
+ p = ice_pkg_buf_alloc_section(bld, id,
+ struct_size(p, entry, 1));
+
+ if (!p)
+ return -ENOSPC;
+
+ p->count = cpu_to_le16(1);
+ p->entry[0].addr = cpu_to_le16(tmp->tcam_idx);
+ p->entry[0].prof_id = tmp->prof_id;
+
+ memcpy(p->entry[0].key,
+ &hw->blk[blk].prof.t[tmp->tcam_idx].key,
+ sizeof(hw->blk[blk].prof.t->key));
+ }
+
+ return 0;
+}
+
+/**
+ * ice_prof_bld_xlt1 - build XLT1 changes
+ * @blk: hardware block
+ * @bld: the update package buffer build to add to
+ * @chgs: the list of changes to make in hardware
+ */
+static int
+ice_prof_bld_xlt1(enum ice_block blk, struct ice_buf_build *bld,
+ struct list_head *chgs)
+{
+ struct ice_chs_chg *tmp;
+
+ list_for_each_entry(tmp, chgs, list_entry)
+ if (tmp->type == ICE_PTG_ES_ADD && tmp->add_ptg) {
+ struct ice_xlt1_section *p;
+ u32 id;
+
+ id = ice_sect_id(blk, ICE_XLT1);
+ p = ice_pkg_buf_alloc_section(bld, id,
+ struct_size(p, value, 1));
+
+ if (!p)
+ return -ENOSPC;
+
+ p->count = cpu_to_le16(1);
+ p->offset = cpu_to_le16(tmp->ptype);
+ p->value[0] = tmp->ptg;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_prof_bld_xlt2 - build XLT2 changes
+ * @blk: hardware block
+ * @bld: the update package buffer build to add to
+ * @chgs: the list of changes to make in hardware
+ */
+static int
+ice_prof_bld_xlt2(enum ice_block blk, struct ice_buf_build *bld,
+ struct list_head *chgs)
+{
+ struct ice_chs_chg *tmp;
+
+ list_for_each_entry(tmp, chgs, list_entry) {
+ struct ice_xlt2_section *p;
+ u32 id;
+
+ switch (tmp->type) {
+ case ICE_VSIG_ADD:
+ case ICE_VSI_MOVE:
+ case ICE_VSIG_REM:
+ id = ice_sect_id(blk, ICE_XLT2);
+ p = ice_pkg_buf_alloc_section(bld, id,
+ struct_size(p, value, 1));
+
+ if (!p)
+ return -ENOSPC;
+
+ p->count = cpu_to_le16(1);
+ p->offset = cpu_to_le16(tmp->vsi);
+ p->value[0] = cpu_to_le16(tmp->vsig);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_upd_prof_hw - update hardware using the change list
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @chgs: the list of changes to make in hardware
+ */
+static int
+ice_upd_prof_hw(struct ice_hw *hw, enum ice_block blk,
+ struct list_head *chgs)
+{
+ struct ice_buf_build *b;
+ struct ice_chs_chg *tmp;
+ u16 pkg_sects;
+ u16 xlt1 = 0;
+ u16 xlt2 = 0;
+ u16 tcam = 0;
+ u16 es = 0;
+ int status;
+ u16 sects;
+
+ /* count number of sections we need */
+ list_for_each_entry(tmp, chgs, list_entry) {
+ switch (tmp->type) {
+ case ICE_PTG_ES_ADD:
+ if (tmp->add_ptg)
+ xlt1++;
+ if (tmp->add_prof)
+ es++;
+ break;
+ case ICE_TCAM_ADD:
+ tcam++;
+ break;
+ case ICE_VSIG_ADD:
+ case ICE_VSI_MOVE:
+ case ICE_VSIG_REM:
+ xlt2++;
+ break;
+ default:
+ break;
+ }
+ }
+ sects = xlt1 + xlt2 + tcam + es;
+
+ if (!sects)
+ return 0;
+
+ /* Build update package buffer */
+ b = ice_pkg_buf_alloc(hw);
+ if (!b)
+ return -ENOMEM;
+
+ status = ice_pkg_buf_reserve_section(b, sects);
+ if (status)
+ goto error_tmp;
+
+ /* Preserve order of table update: ES, TCAM, PTG, VSIG */
+ if (es) {
+ status = ice_prof_bld_es(hw, blk, b, chgs);
+ if (status)
+ goto error_tmp;
+ }
+
+ if (tcam) {
+ status = ice_prof_bld_tcam(hw, blk, b, chgs);
+ if (status)
+ goto error_tmp;
+ }
+
+ if (xlt1) {
+ status = ice_prof_bld_xlt1(blk, b, chgs);
+ if (status)
+ goto error_tmp;
+ }
+
+ if (xlt2) {
+ status = ice_prof_bld_xlt2(blk, b, chgs);
+ if (status)
+ goto error_tmp;
+ }
+
+ /* After package buffer build check if the section count in buffer is
+ * non-zero and matches the number of sections detected for package
+ * update.
+ */
+ pkg_sects = ice_pkg_buf_get_active_sections(b);
+ if (!pkg_sects || pkg_sects != sects) {
+ status = -EINVAL;
+ goto error_tmp;
+ }
+
+ /* update package */
+ status = ice_update_pkg(hw, ice_pkg_buf(b), 1);
+ if (status == -EIO)
+ ice_debug(hw, ICE_DBG_INIT, "Unable to update HW profile\n");
+
+error_tmp:
+ ice_pkg_buf_free(hw, b);
+ return status;
+}
+
+/**
+ * ice_update_fd_mask - set Flow Director Field Vector mask for a profile
+ * @hw: pointer to the HW struct
+ * @prof_id: profile ID
+ * @mask_sel: mask select
+ *
+ * This function enable any of the masks selected by the mask select parameter
+ * for the profile specified.
+ */
+static void ice_update_fd_mask(struct ice_hw *hw, u16 prof_id, u32 mask_sel)
+{
+ wr32(hw, GLQF_FDMASK_SEL(prof_id), mask_sel);
+
+ ice_debug(hw, ICE_DBG_INIT, "fd mask(%d): %x = %x\n", prof_id,
+ GLQF_FDMASK_SEL(prof_id), mask_sel);
+}
+
+struct ice_fd_src_dst_pair {
+ u8 prot_id;
+ u8 count;
+ u16 off;
+};
+
+static const struct ice_fd_src_dst_pair ice_fd_pairs[] = {
+ /* These are defined in pairs */
+ { ICE_PROT_IPV4_OF_OR_S, 2, 12 },
+ { ICE_PROT_IPV4_OF_OR_S, 2, 16 },
+
+ { ICE_PROT_IPV4_IL, 2, 12 },
+ { ICE_PROT_IPV4_IL, 2, 16 },
+
+ { ICE_PROT_IPV6_OF_OR_S, 8, 8 },
+ { ICE_PROT_IPV6_OF_OR_S, 8, 24 },
+
+ { ICE_PROT_IPV6_IL, 8, 8 },
+ { ICE_PROT_IPV6_IL, 8, 24 },
+
+ { ICE_PROT_TCP_IL, 1, 0 },
+ { ICE_PROT_TCP_IL, 1, 2 },
+
+ { ICE_PROT_UDP_OF, 1, 0 },
+ { ICE_PROT_UDP_OF, 1, 2 },
+
+ { ICE_PROT_UDP_IL_OR_S, 1, 0 },
+ { ICE_PROT_UDP_IL_OR_S, 1, 2 },
+
+ { ICE_PROT_SCTP_IL, 1, 0 },
+ { ICE_PROT_SCTP_IL, 1, 2 }
+};
+
+#define ICE_FD_SRC_DST_PAIR_COUNT ARRAY_SIZE(ice_fd_pairs)
+
+/**
+ * ice_update_fd_swap - set register appropriately for a FD FV extraction
+ * @hw: pointer to the HW struct
+ * @prof_id: profile ID
+ * @es: extraction sequence (length of array is determined by the block)
+ */
+static int
+ice_update_fd_swap(struct ice_hw *hw, u16 prof_id, struct ice_fv_word *es)
+{
+ DECLARE_BITMAP(pair_list, ICE_FD_SRC_DST_PAIR_COUNT);
+ u8 pair_start[ICE_FD_SRC_DST_PAIR_COUNT] = { 0 };
+#define ICE_FD_FV_NOT_FOUND (-2)
+ s8 first_free = ICE_FD_FV_NOT_FOUND;
+ u8 used[ICE_MAX_FV_WORDS] = { 0 };
+ s8 orig_free, si;
+ u32 mask_sel = 0;
+ u8 i, j, k;
+
+ bitmap_zero(pair_list, ICE_FD_SRC_DST_PAIR_COUNT);
+
+ /* This code assumes that the Flow Director field vectors are assigned
+ * from the end of the FV indexes working towards the zero index, that
+ * only complete fields will be included and will be consecutive, and
+ * that there are no gaps between valid indexes.
+ */
+
+ /* Determine swap fields present */
+ for (i = 0; i < hw->blk[ICE_BLK_FD].es.fvw; i++) {
+ /* Find the first free entry, assuming right to left population.
+ * This is where we can start adding additional pairs if needed.
+ */
+ if (first_free == ICE_FD_FV_NOT_FOUND && es[i].prot_id !=
+ ICE_PROT_INVALID)
+ first_free = i - 1;
+
+ for (j = 0; j < ICE_FD_SRC_DST_PAIR_COUNT; j++)
+ if (es[i].prot_id == ice_fd_pairs[j].prot_id &&
+ es[i].off == ice_fd_pairs[j].off) {
+ __set_bit(j, pair_list);
+ pair_start[j] = i;
+ }
+ }
+
+ orig_free = first_free;
+
+ /* determine missing swap fields that need to be added */
+ for (i = 0; i < ICE_FD_SRC_DST_PAIR_COUNT; i += 2) {
+ u8 bit1 = test_bit(i + 1, pair_list);
+ u8 bit0 = test_bit(i, pair_list);
+
+ if (bit0 ^ bit1) {
+ u8 index;
+
+ /* add the appropriate 'paired' entry */
+ if (!bit0)
+ index = i;
+ else
+ index = i + 1;
+
+ /* check for room */
+ if (first_free + 1 < (s8)ice_fd_pairs[index].count)
+ return -ENOSPC;
+
+ /* place in extraction sequence */
+ for (k = 0; k < ice_fd_pairs[index].count; k++) {
+ es[first_free - k].prot_id =
+ ice_fd_pairs[index].prot_id;
+ es[first_free - k].off =
+ ice_fd_pairs[index].off + (k * 2);
+
+ if (k > first_free)
+ return -EIO;
+
+ /* keep track of non-relevant fields */
+ mask_sel |= BIT(first_free - k);
+ }
+
+ pair_start[index] = first_free;
+ first_free -= ice_fd_pairs[index].count;
+ }
+ }
+
+ /* fill in the swap array */
+ si = hw->blk[ICE_BLK_FD].es.fvw - 1;
+ while (si >= 0) {
+ u8 indexes_used = 1;
+
+ /* assume flat at this index */
+#define ICE_SWAP_VALID 0x80
+ used[si] = si | ICE_SWAP_VALID;
+
+ if (orig_free == ICE_FD_FV_NOT_FOUND || si <= orig_free) {
+ si -= indexes_used;
+ continue;
+ }
+
+ /* check for a swap location */
+ for (j = 0; j < ICE_FD_SRC_DST_PAIR_COUNT; j++)
+ if (es[si].prot_id == ice_fd_pairs[j].prot_id &&
+ es[si].off == ice_fd_pairs[j].off) {
+ u8 idx;
+
+ /* determine the appropriate matching field */
+ idx = j + ((j % 2) ? -1 : 1);
+
+ indexes_used = ice_fd_pairs[idx].count;
+ for (k = 0; k < indexes_used; k++) {
+ used[si - k] = (pair_start[idx] - k) |
+ ICE_SWAP_VALID;
+ }
+
+ break;
+ }
+
+ si -= indexes_used;
+ }
+
+ /* for each set of 4 swap and 4 inset indexes, write the appropriate
+ * register
+ */
+ for (j = 0; j < hw->blk[ICE_BLK_FD].es.fvw / 4; j++) {
+ u32 raw_swap = 0;
+ u32 raw_in = 0;
+
+ for (k = 0; k < 4; k++) {
+ u8 idx;
+
+ idx = (j * 4) + k;
+ if (used[idx] && !(mask_sel & BIT(idx))) {
+ raw_swap |= used[idx] << (k * BITS_PER_BYTE);
+#define ICE_INSET_DFLT 0x9f
+ raw_in |= ICE_INSET_DFLT << (k * BITS_PER_BYTE);
+ }
+ }
+
+ /* write the appropriate swap register set */
+ wr32(hw, GLQF_FDSWAP(prof_id, j), raw_swap);
+
+ ice_debug(hw, ICE_DBG_INIT, "swap wr(%d, %d): %x = %08x\n",
+ prof_id, j, GLQF_FDSWAP(prof_id, j), raw_swap);
+
+ /* write the appropriate inset register set */
+ wr32(hw, GLQF_FDINSET(prof_id, j), raw_in);
+
+ ice_debug(hw, ICE_DBG_INIT, "inset wr(%d, %d): %x = %08x\n",
+ prof_id, j, GLQF_FDINSET(prof_id, j), raw_in);
+ }
+
+ /* initially clear the mask select for this profile */
+ ice_update_fd_mask(hw, prof_id, 0);
+
+ return 0;
+}
+
+/* The entries here needs to match the order of enum ice_ptype_attrib */
+static const struct ice_ptype_attrib_info ice_ptype_attributes[] = {
+ { ICE_GTP_PDU_EH, ICE_GTP_PDU_FLAG_MASK },
+ { ICE_GTP_SESSION, ICE_GTP_FLAGS_MASK },
+ { ICE_GTP_DOWNLINK, ICE_GTP_FLAGS_MASK },
+ { ICE_GTP_UPLINK, ICE_GTP_FLAGS_MASK },
+};
+
+/**
+ * ice_get_ptype_attrib_info - get PTYPE attribute information
+ * @type: attribute type
+ * @info: pointer to variable to the attribute information
+ */
+static void
+ice_get_ptype_attrib_info(enum ice_ptype_attrib_type type,
+ struct ice_ptype_attrib_info *info)
+{
+ *info = ice_ptype_attributes[type];
+}
+
+/**
+ * ice_add_prof_attrib - add any PTG with attributes to profile
+ * @prof: pointer to the profile to which PTG entries will be added
+ * @ptg: PTG to be added
+ * @ptype: PTYPE that needs to be looked up
+ * @attr: array of attributes that will be considered
+ * @attr_cnt: number of elements in the attribute array
+ */
+static int
+ice_add_prof_attrib(struct ice_prof_map *prof, u8 ptg, u16 ptype,
+ const struct ice_ptype_attributes *attr, u16 attr_cnt)
+{
+ bool found = false;
+ u16 i;
+
+ for (i = 0; i < attr_cnt; i++)
+ if (attr[i].ptype == ptype) {
+ found = true;
+
+ prof->ptg[prof->ptg_cnt] = ptg;
+ ice_get_ptype_attrib_info(attr[i].attrib,
+ &prof->attr[prof->ptg_cnt]);
+
+ if (++prof->ptg_cnt >= ICE_MAX_PTG_PER_PROFILE)
+ return -ENOSPC;
+ }
+
+ if (!found)
+ return -ENOENT;
+
+ return 0;
+}
+
+/**
+ * ice_add_prof - add profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @id: profile tracking ID
+ * @ptypes: array of bitmaps indicating ptypes (ICE_FLOW_PTYPE_MAX bits)
+ * @attr: array of attributes
+ * @attr_cnt: number of elements in attr array
+ * @es: extraction sequence (length of array is determined by the block)
+ * @masks: mask for extraction sequence
+ *
+ * This function registers a profile, which matches a set of PTYPES with a
+ * particular extraction sequence. While the hardware profile is allocated
+ * it will not be written until the first call to ice_add_flow that specifies
+ * the ID value used here.
+ */
+int
+ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
+ const struct ice_ptype_attributes *attr, u16 attr_cnt,
+ struct ice_fv_word *es, u16 *masks)
+{
+ u32 bytes = DIV_ROUND_UP(ICE_FLOW_PTYPE_MAX, BITS_PER_BYTE);
+ DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT);
+ struct ice_prof_map *prof;
+ u8 byte = 0;
+ u8 prof_id;
+ int status;
+
+ bitmap_zero(ptgs_used, ICE_XLT1_CNT);
+
+ mutex_lock(&hw->blk[blk].es.prof_map_lock);
+
+ /* search for existing profile */
+ status = ice_find_prof_id_with_mask(hw, blk, es, masks, &prof_id);
+ if (status) {
+ /* allocate profile ID */
+ status = ice_alloc_prof_id(hw, blk, &prof_id);
+ if (status)
+ goto err_ice_add_prof;
+ if (blk == ICE_BLK_FD) {
+ /* For Flow Director block, the extraction sequence may
+ * need to be altered in the case where there are paired
+ * fields that have no match. This is necessary because
+ * for Flow Director, src and dest fields need to paired
+ * for filter programming and these values are swapped
+ * during Tx.
+ */
+ status = ice_update_fd_swap(hw, prof_id, es);
+ if (status)
+ goto err_ice_add_prof;
+ }
+ status = ice_update_prof_masking(hw, blk, prof_id, masks);
+ if (status)
+ goto err_ice_add_prof;
+
+ /* and write new es */
+ ice_write_es(hw, blk, prof_id, es);
+ }
+
+ ice_prof_inc_ref(hw, blk, prof_id);
+
+ /* add profile info */
+ prof = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*prof), GFP_KERNEL);
+ if (!prof) {
+ status = -ENOMEM;
+ goto err_ice_add_prof;
+ }
+
+ prof->profile_cookie = id;
+ prof->prof_id = prof_id;
+ prof->ptg_cnt = 0;
+ prof->context = 0;
+
+ /* build list of ptgs */
+ while (bytes && prof->ptg_cnt < ICE_MAX_PTG_PER_PROFILE) {
+ u8 bit;
+
+ if (!ptypes[byte]) {
+ bytes--;
+ byte++;
+ continue;
+ }
+
+ /* Examine 8 bits per byte */
+ for_each_set_bit(bit, (unsigned long *)&ptypes[byte],
+ BITS_PER_BYTE) {
+ u16 ptype;
+ u8 ptg;
+
+ ptype = byte * BITS_PER_BYTE + bit;
+
+ /* The package should place all ptypes in a non-zero
+ * PTG, so the following call should never fail.
+ */
+ if (ice_ptg_find_ptype(hw, blk, ptype, &ptg))
+ continue;
+
+ /* If PTG is already added, skip and continue */
+ if (test_bit(ptg, ptgs_used))
+ continue;
+
+ __set_bit(ptg, ptgs_used);
+ /* Check to see there are any attributes for
+ * this PTYPE, and add them if found.
+ */
+ status = ice_add_prof_attrib(prof, ptg, ptype,
+ attr, attr_cnt);
+ if (status == -ENOSPC)
+ break;
+ if (status) {
+ /* This is simple a PTYPE/PTG with no
+ * attribute
+ */
+ prof->ptg[prof->ptg_cnt] = ptg;
+ prof->attr[prof->ptg_cnt].flags = 0;
+ prof->attr[prof->ptg_cnt].mask = 0;
+
+ if (++prof->ptg_cnt >=
+ ICE_MAX_PTG_PER_PROFILE)
+ break;
+ }
+ }
+
+ bytes--;
+ byte++;
+ }
+
+ list_add(&prof->list, &hw->blk[blk].es.prof_map);
+ status = 0;
+
+err_ice_add_prof:
+ mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+ return status;
+}
+
+/**
+ * ice_search_prof_id - Search for a profile tracking ID
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @id: profile tracking ID
+ *
+ * This will search for a profile tracking ID which was previously added.
+ * The profile map lock should be held before calling this function.
+ */
+static struct ice_prof_map *
+ice_search_prof_id(struct ice_hw *hw, enum ice_block blk, u64 id)
+{
+ struct ice_prof_map *entry = NULL;
+ struct ice_prof_map *map;
+
+ list_for_each_entry(map, &hw->blk[blk].es.prof_map, list)
+ if (map->profile_cookie == id) {
+ entry = map;
+ break;
+ }
+
+ return entry;
+}
+
+/**
+ * ice_vsig_prof_id_count - count profiles in a VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: VSIG to remove the profile from
+ */
+static u16
+ice_vsig_prof_id_count(struct ice_hw *hw, enum ice_block blk, u16 vsig)
+{
+ u16 idx = vsig & ICE_VSIG_IDX_M, count = 0;
+ struct ice_vsig_prof *p;
+
+ list_for_each_entry(p, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+ list)
+ count++;
+
+ return count;
+}
+
+/**
+ * ice_rel_tcam_idx - release a TCAM index
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @idx: the index to release
+ */
+static int ice_rel_tcam_idx(struct ice_hw *hw, enum ice_block blk, u16 idx)
+{
+ /* Masks to invoke a never match entry */
+ u8 vl_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+ u8 dc_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFE, 0xFF, 0xFF, 0xFF, 0xFF };
+ u8 nm_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x01, 0x00, 0x00, 0x00, 0x00 };
+ int status;
+
+ /* write the TCAM entry */
+ status = ice_tcam_write_entry(hw, blk, idx, 0, 0, 0, 0, 0, vl_msk,
+ dc_msk, nm_msk);
+ if (status)
+ return status;
+
+ /* release the TCAM entry */
+ status = ice_free_tcam_ent(hw, blk, idx);
+
+ return status;
+}
+
+/**
+ * ice_rem_prof_id - remove one profile from a VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @prof: pointer to profile structure to remove
+ */
+static int
+ice_rem_prof_id(struct ice_hw *hw, enum ice_block blk,
+ struct ice_vsig_prof *prof)
+{
+ int status;
+ u16 i;
+
+ for (i = 0; i < prof->tcam_count; i++)
+ if (prof->tcam[i].in_use) {
+ prof->tcam[i].in_use = false;
+ status = ice_rel_tcam_idx(hw, blk,
+ prof->tcam[i].tcam_idx);
+ if (status)
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_rem_vsig - remove VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: the VSIG to remove
+ * @chg: the change list
+ */
+static int
+ice_rem_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig,
+ struct list_head *chg)
+{
+ u16 idx = vsig & ICE_VSIG_IDX_M;
+ struct ice_vsig_vsi *vsi_cur;
+ struct ice_vsig_prof *d, *t;
+ int status;
+
+ /* remove TCAM entries */
+ list_for_each_entry_safe(d, t,
+ &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+ list) {
+ status = ice_rem_prof_id(hw, blk, d);
+ if (status)
+ return status;
+
+ list_del(&d->list);
+ devm_kfree(ice_hw_to_dev(hw), d);
+ }
+
+ /* Move all VSIS associated with this VSIG to the default VSIG */
+ vsi_cur = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+ /* If the VSIG has at least 1 VSI then iterate through the list
+ * and remove the VSIs before deleting the group.
+ */
+ if (vsi_cur)
+ do {
+ struct ice_vsig_vsi *tmp = vsi_cur->next_vsi;
+ struct ice_chs_chg *p;
+
+ p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p),
+ GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ p->type = ICE_VSIG_REM;
+ p->orig_vsig = vsig;
+ p->vsig = ICE_DEFAULT_VSIG;
+ p->vsi = vsi_cur - hw->blk[blk].xlt2.vsis;
+
+ list_add(&p->list_entry, chg);
+
+ vsi_cur = tmp;
+ } while (vsi_cur);
+
+ return ice_vsig_free(hw, blk, vsig);
+}
+
+/**
+ * ice_rem_prof_id_vsig - remove a specific profile from a VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: VSIG to remove the profile from
+ * @hdl: profile handle indicating which profile to remove
+ * @chg: list to receive a record of changes
+ */
+static int
+ice_rem_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl,
+ struct list_head *chg)
+{
+ u16 idx = vsig & ICE_VSIG_IDX_M;
+ struct ice_vsig_prof *p, *t;
+ int status;
+
+ list_for_each_entry_safe(p, t,
+ &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+ list)
+ if (p->profile_cookie == hdl) {
+ if (ice_vsig_prof_id_count(hw, blk, vsig) == 1)
+ /* this is the last profile, remove the VSIG */
+ return ice_rem_vsig(hw, blk, vsig, chg);
+
+ status = ice_rem_prof_id(hw, blk, p);
+ if (!status) {
+ list_del(&p->list);
+ devm_kfree(ice_hw_to_dev(hw), p);
+ }
+ return status;
+ }
+
+ return -ENOENT;
+}
+
+/**
+ * ice_rem_flow_all - remove all flows with a particular profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @id: profile tracking ID
+ */
+static int ice_rem_flow_all(struct ice_hw *hw, enum ice_block blk, u64 id)
+{
+ struct ice_chs_chg *del, *tmp;
+ struct list_head chg;
+ int status;
+ u16 i;
+
+ INIT_LIST_HEAD(&chg);
+
+ for (i = 1; i < ICE_MAX_VSIGS; i++)
+ if (hw->blk[blk].xlt2.vsig_tbl[i].in_use) {
+ if (ice_has_prof_vsig(hw, blk, i, id)) {
+ status = ice_rem_prof_id_vsig(hw, blk, i, id,
+ &chg);
+ if (status)
+ goto err_ice_rem_flow_all;
+ }
+ }
+
+ status = ice_upd_prof_hw(hw, blk, &chg);
+
+err_ice_rem_flow_all:
+ list_for_each_entry_safe(del, tmp, &chg, list_entry) {
+ list_del(&del->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), del);
+ }
+
+ return status;
+}
+
+/**
+ * ice_rem_prof - remove profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @id: profile tracking ID
+ *
+ * This will remove the profile specified by the ID parameter, which was
+ * previously created through ice_add_prof. If any existing entries
+ * are associated with this profile, they will be removed as well.
+ */
+int ice_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 id)
+{
+ struct ice_prof_map *pmap;
+ int status;
+
+ mutex_lock(&hw->blk[blk].es.prof_map_lock);
+
+ pmap = ice_search_prof_id(hw, blk, id);
+ if (!pmap) {
+ status = -ENOENT;
+ goto err_ice_rem_prof;
+ }
+
+ /* remove all flows with this profile */
+ status = ice_rem_flow_all(hw, blk, pmap->profile_cookie);
+ if (status)
+ goto err_ice_rem_prof;
+
+ /* dereference profile, and possibly remove */
+ ice_prof_dec_ref(hw, blk, pmap->prof_id);
+
+ list_del(&pmap->list);
+ devm_kfree(ice_hw_to_dev(hw), pmap);
+
+err_ice_rem_prof:
+ mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+ return status;
+}
+
+/**
+ * ice_get_prof - get profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @hdl: profile handle
+ * @chg: change list
+ */
+static int
+ice_get_prof(struct ice_hw *hw, enum ice_block blk, u64 hdl,
+ struct list_head *chg)
+{
+ struct ice_prof_map *map;
+ struct ice_chs_chg *p;
+ int status = 0;
+ u16 i;
+
+ mutex_lock(&hw->blk[blk].es.prof_map_lock);
+ /* Get the details on the profile specified by the handle ID */
+ map = ice_search_prof_id(hw, blk, hdl);
+ if (!map) {
+ status = -ENOENT;
+ goto err_ice_get_prof;
+ }
+
+ for (i = 0; i < map->ptg_cnt; i++)
+ if (!hw->blk[blk].es.written[map->prof_id]) {
+ /* add ES to change list */
+ p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p),
+ GFP_KERNEL);
+ if (!p) {
+ status = -ENOMEM;
+ goto err_ice_get_prof;
+ }
+
+ p->type = ICE_PTG_ES_ADD;
+ p->ptype = 0;
+ p->ptg = map->ptg[i];
+ p->add_ptg = 0;
+
+ p->add_prof = 1;
+ p->prof_id = map->prof_id;
+
+ hw->blk[blk].es.written[map->prof_id] = true;
+
+ list_add(&p->list_entry, chg);
+ }
+
+err_ice_get_prof:
+ mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+ /* let caller clean up the change list */
+ return status;
+}
+
+/**
+ * ice_get_profs_vsig - get a copy of the list of profiles from a VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: VSIG from which to copy the list
+ * @lst: output list
+ *
+ * This routine makes a copy of the list of profiles in the specified VSIG.
+ */
+static int
+ice_get_profs_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig,
+ struct list_head *lst)
+{
+ struct ice_vsig_prof *ent1, *ent2;
+ u16 idx = vsig & ICE_VSIG_IDX_M;
+
+ list_for_each_entry(ent1, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+ list) {
+ struct ice_vsig_prof *p;
+
+ /* copy to the input list */
+ p = devm_kmemdup(ice_hw_to_dev(hw), ent1, sizeof(*p),
+ GFP_KERNEL);
+ if (!p)
+ goto err_ice_get_profs_vsig;
+
+ list_add_tail(&p->list, lst);
+ }
+
+ return 0;
+
+err_ice_get_profs_vsig:
+ list_for_each_entry_safe(ent1, ent2, lst, list) {
+ list_del(&ent1->list);
+ devm_kfree(ice_hw_to_dev(hw), ent1);
+ }
+
+ return -ENOMEM;
+}
+
+/**
+ * ice_add_prof_to_lst - add profile entry to a list
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @lst: the list to be added to
+ * @hdl: profile handle of entry to add
+ */
+static int
+ice_add_prof_to_lst(struct ice_hw *hw, enum ice_block blk,
+ struct list_head *lst, u64 hdl)
+{
+ struct ice_prof_map *map;
+ struct ice_vsig_prof *p;
+ int status = 0;
+ u16 i;
+
+ mutex_lock(&hw->blk[blk].es.prof_map_lock);
+ map = ice_search_prof_id(hw, blk, hdl);
+ if (!map) {
+ status = -ENOENT;
+ goto err_ice_add_prof_to_lst;
+ }
+
+ p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ status = -ENOMEM;
+ goto err_ice_add_prof_to_lst;
+ }
+
+ p->profile_cookie = map->profile_cookie;
+ p->prof_id = map->prof_id;
+ p->tcam_count = map->ptg_cnt;
+
+ for (i = 0; i < map->ptg_cnt; i++) {
+ p->tcam[i].prof_id = map->prof_id;
+ p->tcam[i].tcam_idx = ICE_INVALID_TCAM;
+ p->tcam[i].ptg = map->ptg[i];
+ }
+
+ list_add(&p->list, lst);
+
+err_ice_add_prof_to_lst:
+ mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+ return status;
+}
+
+/**
+ * ice_move_vsi - move VSI to another VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the VSI to move
+ * @vsig: the VSIG to move the VSI to
+ * @chg: the change list
+ */
+static int
+ice_move_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig,
+ struct list_head *chg)
+{
+ struct ice_chs_chg *p;
+ u16 orig_vsig;
+ int status;
+
+ p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ status = ice_vsig_find_vsi(hw, blk, vsi, &orig_vsig);
+ if (!status)
+ status = ice_vsig_add_mv_vsi(hw, blk, vsi, vsig);
+
+ if (status) {
+ devm_kfree(ice_hw_to_dev(hw), p);
+ return status;
+ }
+
+ p->type = ICE_VSI_MOVE;
+ p->vsi = vsi;
+ p->orig_vsig = orig_vsig;
+ p->vsig = vsig;
+
+ list_add(&p->list_entry, chg);
+
+ return 0;
+}
+
+/**
+ * ice_rem_chg_tcam_ent - remove a specific TCAM entry from change list
+ * @hw: pointer to the HW struct
+ * @idx: the index of the TCAM entry to remove
+ * @chg: the list of change structures to search
+ */
+static void
+ice_rem_chg_tcam_ent(struct ice_hw *hw, u16 idx, struct list_head *chg)
+{
+ struct ice_chs_chg *pos, *tmp;
+
+ list_for_each_entry_safe(tmp, pos, chg, list_entry)
+ if (tmp->type == ICE_TCAM_ADD && tmp->tcam_idx == idx) {
+ list_del(&tmp->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), tmp);
+ }
+}
+
+/**
+ * ice_prof_tcam_ena_dis - add enable or disable TCAM change
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @enable: true to enable, false to disable
+ * @vsig: the VSIG of the TCAM entry
+ * @tcam: pointer the TCAM info structure of the TCAM to disable
+ * @chg: the change list
+ *
+ * This function appends an enable or disable TCAM entry in the change log
+ */
+static int
+ice_prof_tcam_ena_dis(struct ice_hw *hw, enum ice_block blk, bool enable,
+ u16 vsig, struct ice_tcam_inf *tcam,
+ struct list_head *chg)
+{
+ struct ice_chs_chg *p;
+ int status;
+
+ u8 vl_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+ u8 dc_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0x00, 0x00, 0x00 };
+ u8 nm_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x00, 0x00, 0x00, 0x00, 0x00 };
+
+ /* if disabling, free the TCAM */
+ if (!enable) {
+ status = ice_rel_tcam_idx(hw, blk, tcam->tcam_idx);
+
+ /* if we have already created a change for this TCAM entry, then
+ * we need to remove that entry, in order to prevent writing to
+ * a TCAM entry we no longer will have ownership of.
+ */
+ ice_rem_chg_tcam_ent(hw, tcam->tcam_idx, chg);
+ tcam->tcam_idx = 0;
+ tcam->in_use = 0;
+ return status;
+ }
+
+ /* for re-enabling, reallocate a TCAM */
+ /* for entries with empty attribute masks, allocate entry from
+ * the bottom of the TCAM table; otherwise, allocate from the
+ * top of the table in order to give it higher priority
+ */
+ status = ice_alloc_tcam_ent(hw, blk, tcam->attr.mask == 0,
+ &tcam->tcam_idx);
+ if (status)
+ return status;
+
+ /* add TCAM to change list */
+ p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ status = ice_tcam_write_entry(hw, blk, tcam->tcam_idx, tcam->prof_id,
+ tcam->ptg, vsig, 0, tcam->attr.flags,
+ vl_msk, dc_msk, nm_msk);
+ if (status)
+ goto err_ice_prof_tcam_ena_dis;
+
+ tcam->in_use = 1;
+
+ p->type = ICE_TCAM_ADD;
+ p->add_tcam_idx = true;
+ p->prof_id = tcam->prof_id;
+ p->ptg = tcam->ptg;
+ p->vsig = 0;
+ p->tcam_idx = tcam->tcam_idx;
+
+ /* log change */
+ list_add(&p->list_entry, chg);
+
+ return 0;
+
+err_ice_prof_tcam_ena_dis:
+ devm_kfree(ice_hw_to_dev(hw), p);
+ return status;
+}
+
+/**
+ * ice_adj_prof_priorities - adjust profile based on priorities
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: the VSIG for which to adjust profile priorities
+ * @chg: the change list
+ */
+static int
+ice_adj_prof_priorities(struct ice_hw *hw, enum ice_block blk, u16 vsig,
+ struct list_head *chg)
+{
+ DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT);
+ struct ice_vsig_prof *t;
+ int status;
+ u16 idx;
+
+ bitmap_zero(ptgs_used, ICE_XLT1_CNT);
+ idx = vsig & ICE_VSIG_IDX_M;
+
+ /* Priority is based on the order in which the profiles are added. The
+ * newest added profile has highest priority and the oldest added
+ * profile has the lowest priority. Since the profile property list for
+ * a VSIG is sorted from newest to oldest, this code traverses the list
+ * in order and enables the first of each PTG that it finds (that is not
+ * already enabled); it also disables any duplicate PTGs that it finds
+ * in the older profiles (that are currently enabled).
+ */
+
+ list_for_each_entry(t, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+ list) {
+ u16 i;
+
+ for (i = 0; i < t->tcam_count; i++) {
+ /* Scan the priorities from newest to oldest.
+ * Make sure that the newest profiles take priority.
+ */
+ if (test_bit(t->tcam[i].ptg, ptgs_used) &&
+ t->tcam[i].in_use) {
+ /* need to mark this PTG as never match, as it
+ * was already in use and therefore duplicate
+ * (and lower priority)
+ */
+ status = ice_prof_tcam_ena_dis(hw, blk, false,
+ vsig,
+ &t->tcam[i],
+ chg);
+ if (status)
+ return status;
+ } else if (!test_bit(t->tcam[i].ptg, ptgs_used) &&
+ !t->tcam[i].in_use) {
+ /* need to enable this PTG, as it in not in use
+ * and not enabled (highest priority)
+ */
+ status = ice_prof_tcam_ena_dis(hw, blk, true,
+ vsig,
+ &t->tcam[i],
+ chg);
+ if (status)
+ return status;
+ }
+
+ /* keep track of used ptgs */
+ __set_bit(t->tcam[i].ptg, ptgs_used);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_add_prof_id_vsig - add profile to VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: the VSIG to which this profile is to be added
+ * @hdl: the profile handle indicating the profile to add
+ * @rev: true to add entries to the end of the list
+ * @chg: the change list
+ */
+static int
+ice_add_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl,
+ bool rev, struct list_head *chg)
+{
+ /* Masks that ignore flags */
+ u8 vl_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+ u8 dc_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0x00, 0x00, 0x00 };
+ u8 nm_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x00, 0x00, 0x00, 0x00, 0x00 };
+ struct ice_prof_map *map;
+ struct ice_vsig_prof *t;
+ struct ice_chs_chg *p;
+ u16 vsig_idx, i;
+ int status = 0;
+
+ /* Error, if this VSIG already has this profile */
+ if (ice_has_prof_vsig(hw, blk, vsig, hdl))
+ return -EEXIST;
+
+ /* new VSIG profile structure */
+ t = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*t), GFP_KERNEL);
+ if (!t)
+ return -ENOMEM;
+
+ mutex_lock(&hw->blk[blk].es.prof_map_lock);
+ /* Get the details on the profile specified by the handle ID */
+ map = ice_search_prof_id(hw, blk, hdl);
+ if (!map) {
+ status = -ENOENT;
+ goto err_ice_add_prof_id_vsig;
+ }
+
+ t->profile_cookie = map->profile_cookie;
+ t->prof_id = map->prof_id;
+ t->tcam_count = map->ptg_cnt;
+
+ /* create TCAM entries */
+ for (i = 0; i < map->ptg_cnt; i++) {
+ u16 tcam_idx;
+
+ /* add TCAM to change list */
+ p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ status = -ENOMEM;
+ goto err_ice_add_prof_id_vsig;
+ }
+
+ /* allocate the TCAM entry index */
+ /* for entries with empty attribute masks, allocate entry from
+ * the bottom of the TCAM table; otherwise, allocate from the
+ * top of the table in order to give it higher priority
+ */
+ status = ice_alloc_tcam_ent(hw, blk, map->attr[i].mask == 0,
+ &tcam_idx);
+ if (status) {
+ devm_kfree(ice_hw_to_dev(hw), p);
+ goto err_ice_add_prof_id_vsig;
+ }
+
+ t->tcam[i].ptg = map->ptg[i];
+ t->tcam[i].prof_id = map->prof_id;
+ t->tcam[i].tcam_idx = tcam_idx;
+ t->tcam[i].attr = map->attr[i];
+ t->tcam[i].in_use = true;
+
+ p->type = ICE_TCAM_ADD;
+ p->add_tcam_idx = true;
+ p->prof_id = t->tcam[i].prof_id;
+ p->ptg = t->tcam[i].ptg;
+ p->vsig = vsig;
+ p->tcam_idx = t->tcam[i].tcam_idx;
+
+ /* write the TCAM entry */
+ status = ice_tcam_write_entry(hw, blk, t->tcam[i].tcam_idx,
+ t->tcam[i].prof_id,
+ t->tcam[i].ptg, vsig, 0, 0,
+ vl_msk, dc_msk, nm_msk);
+ if (status) {
+ devm_kfree(ice_hw_to_dev(hw), p);
+ goto err_ice_add_prof_id_vsig;
+ }
+
+ /* log change */
+ list_add(&p->list_entry, chg);
+ }
+
+ /* add profile to VSIG */
+ vsig_idx = vsig & ICE_VSIG_IDX_M;
+ if (rev)
+ list_add_tail(&t->list,
+ &hw->blk[blk].xlt2.vsig_tbl[vsig_idx].prop_lst);
+ else
+ list_add(&t->list,
+ &hw->blk[blk].xlt2.vsig_tbl[vsig_idx].prop_lst);
+
+ mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+ return status;
+
+err_ice_add_prof_id_vsig:
+ mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+ /* let caller clean up the change list */
+ devm_kfree(ice_hw_to_dev(hw), t);
+ return status;
+}
+
+/**
+ * ice_create_prof_id_vsig - add a new VSIG with a single profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the initial VSI that will be in VSIG
+ * @hdl: the profile handle of the profile that will be added to the VSIG
+ * @chg: the change list
+ */
+static int
+ice_create_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl,
+ struct list_head *chg)
+{
+ struct ice_chs_chg *p;
+ u16 new_vsig;
+ int status;
+
+ p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ new_vsig = ice_vsig_alloc(hw, blk);
+ if (!new_vsig) {
+ status = -EIO;
+ goto err_ice_create_prof_id_vsig;
+ }
+
+ status = ice_move_vsi(hw, blk, vsi, new_vsig, chg);
+ if (status)
+ goto err_ice_create_prof_id_vsig;
+
+ status = ice_add_prof_id_vsig(hw, blk, new_vsig, hdl, false, chg);
+ if (status)
+ goto err_ice_create_prof_id_vsig;
+
+ p->type = ICE_VSIG_ADD;
+ p->vsi = vsi;
+ p->orig_vsig = ICE_DEFAULT_VSIG;
+ p->vsig = new_vsig;
+
+ list_add(&p->list_entry, chg);
+
+ return 0;
+
+err_ice_create_prof_id_vsig:
+ /* let caller clean up the change list */
+ devm_kfree(ice_hw_to_dev(hw), p);
+ return status;
+}
+
+/**
+ * ice_create_vsig_from_lst - create a new VSIG with a list of profiles
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the initial VSI that will be in VSIG
+ * @lst: the list of profile that will be added to the VSIG
+ * @new_vsig: return of new VSIG
+ * @chg: the change list
+ */
+static int
+ice_create_vsig_from_lst(struct ice_hw *hw, enum ice_block blk, u16 vsi,
+ struct list_head *lst, u16 *new_vsig,
+ struct list_head *chg)
+{
+ struct ice_vsig_prof *t;
+ int status;
+ u16 vsig;
+
+ vsig = ice_vsig_alloc(hw, blk);
+ if (!vsig)
+ return -EIO;
+
+ status = ice_move_vsi(hw, blk, vsi, vsig, chg);
+ if (status)
+ return status;
+
+ list_for_each_entry(t, lst, list) {
+ /* Reverse the order here since we are copying the list */
+ status = ice_add_prof_id_vsig(hw, blk, vsig, t->profile_cookie,
+ true, chg);
+ if (status)
+ return status;
+ }
+
+ *new_vsig = vsig;
+
+ return 0;
+}
+
+/**
+ * ice_find_prof_vsig - find a VSIG with a specific profile handle
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @hdl: the profile handle of the profile to search for
+ * @vsig: returns the VSIG with the matching profile
+ */
+static bool
+ice_find_prof_vsig(struct ice_hw *hw, enum ice_block blk, u64 hdl, u16 *vsig)
+{
+ struct ice_vsig_prof *t;
+ struct list_head lst;
+ int status;
+
+ INIT_LIST_HEAD(&lst);
+
+ t = kzalloc(sizeof(*t), GFP_KERNEL);
+ if (!t)
+ return false;
+
+ t->profile_cookie = hdl;
+ list_add(&t->list, &lst);
+
+ status = ice_find_dup_props_vsig(hw, blk, &lst, vsig);
+
+ list_del(&t->list);
+ kfree(t);
+
+ return !status;
+}
+
+/**
+ * ice_add_prof_id_flow - add profile flow
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the VSI to enable with the profile specified by ID
+ * @hdl: profile handle
+ *
+ * Calling this function will update the hardware tables to enable the
+ * profile indicated by the ID parameter for the VSIs specified in the VSI
+ * array. Once successfully called, the flow will be enabled.
+ */
+int
+ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl)
+{
+ struct ice_vsig_prof *tmp1, *del1;
+ struct ice_chs_chg *tmp, *del;
+ struct list_head union_lst;
+ struct list_head chg;
+ int status;
+ u16 vsig;
+
+ INIT_LIST_HEAD(&union_lst);
+ INIT_LIST_HEAD(&chg);
+
+ /* Get profile */
+ status = ice_get_prof(hw, blk, hdl, &chg);
+ if (status)
+ return status;
+
+ /* determine if VSI is already part of a VSIG */
+ status = ice_vsig_find_vsi(hw, blk, vsi, &vsig);
+ if (!status && vsig) {
+ bool only_vsi;
+ u16 or_vsig;
+ u16 ref;
+
+ /* found in VSIG */
+ or_vsig = vsig;
+
+ /* make sure that there is no overlap/conflict between the new
+ * characteristics and the existing ones; we don't support that
+ * scenario
+ */
+ if (ice_has_prof_vsig(hw, blk, vsig, hdl)) {
+ status = -EEXIST;
+ goto err_ice_add_prof_id_flow;
+ }
+
+ /* last VSI in the VSIG? */
+ status = ice_vsig_get_ref(hw, blk, vsig, &ref);
+ if (status)
+ goto err_ice_add_prof_id_flow;
+ only_vsi = (ref == 1);
+
+ /* create a union of the current profiles and the one being
+ * added
+ */
+ status = ice_get_profs_vsig(hw, blk, vsig, &union_lst);
+ if (status)
+ goto err_ice_add_prof_id_flow;
+
+ status = ice_add_prof_to_lst(hw, blk, &union_lst, hdl);
+ if (status)
+ goto err_ice_add_prof_id_flow;
+
+ /* search for an existing VSIG with an exact charc match */
+ status = ice_find_dup_props_vsig(hw, blk, &union_lst, &vsig);
+ if (!status) {
+ /* move VSI to the VSIG that matches */
+ status = ice_move_vsi(hw, blk, vsi, vsig, &chg);
+ if (status)
+ goto err_ice_add_prof_id_flow;
+
+ /* VSI has been moved out of or_vsig. If the or_vsig had
+ * only that VSI it is now empty and can be removed.
+ */
+ if (only_vsi) {
+ status = ice_rem_vsig(hw, blk, or_vsig, &chg);
+ if (status)
+ goto err_ice_add_prof_id_flow;
+ }
+ } else if (only_vsi) {
+ /* If the original VSIG only contains one VSI, then it
+ * will be the requesting VSI. In this case the VSI is
+ * not sharing entries and we can simply add the new
+ * profile to the VSIG.
+ */
+ status = ice_add_prof_id_vsig(hw, blk, vsig, hdl, false,
+ &chg);
+ if (status)
+ goto err_ice_add_prof_id_flow;
+
+ /* Adjust priorities */
+ status = ice_adj_prof_priorities(hw, blk, vsig, &chg);
+ if (status)
+ goto err_ice_add_prof_id_flow;
+ } else {
+ /* No match, so we need a new VSIG */
+ status = ice_create_vsig_from_lst(hw, blk, vsi,
+ &union_lst, &vsig,
+ &chg);
+ if (status)
+ goto err_ice_add_prof_id_flow;
+
+ /* Adjust priorities */
+ status = ice_adj_prof_priorities(hw, blk, vsig, &chg);
+ if (status)
+ goto err_ice_add_prof_id_flow;
+ }
+ } else {
+ /* need to find or add a VSIG */
+ /* search for an existing VSIG with an exact charc match */
+ if (ice_find_prof_vsig(hw, blk, hdl, &vsig)) {
+ /* found an exact match */
+ /* add or move VSI to the VSIG that matches */
+ status = ice_move_vsi(hw, blk, vsi, vsig, &chg);
+ if (status)
+ goto err_ice_add_prof_id_flow;
+ } else {
+ /* we did not find an exact match */
+ /* we need to add a VSIG */
+ status = ice_create_prof_id_vsig(hw, blk, vsi, hdl,
+ &chg);
+ if (status)
+ goto err_ice_add_prof_id_flow;
+ }
+ }
+
+ /* update hardware */
+ if (!status)
+ status = ice_upd_prof_hw(hw, blk, &chg);
+
+err_ice_add_prof_id_flow:
+ list_for_each_entry_safe(del, tmp, &chg, list_entry) {
+ list_del(&del->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), del);
+ }
+
+ list_for_each_entry_safe(del1, tmp1, &union_lst, list) {
+ list_del(&del1->list);
+ devm_kfree(ice_hw_to_dev(hw), del1);
+ }
+
+ return status;
+}
+
+/**
+ * ice_rem_prof_from_list - remove a profile from list
+ * @hw: pointer to the HW struct
+ * @lst: list to remove the profile from
+ * @hdl: the profile handle indicating the profile to remove
+ */
+static int
+ice_rem_prof_from_list(struct ice_hw *hw, struct list_head *lst, u64 hdl)
+{
+ struct ice_vsig_prof *ent, *tmp;
+
+ list_for_each_entry_safe(ent, tmp, lst, list)
+ if (ent->profile_cookie == hdl) {
+ list_del(&ent->list);
+ devm_kfree(ice_hw_to_dev(hw), ent);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+/**
+ * ice_rem_prof_id_flow - remove flow
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the VSI from which to remove the profile specified by ID
+ * @hdl: profile tracking handle
+ *
+ * Calling this function will update the hardware tables to remove the
+ * profile indicated by the ID parameter for the VSIs specified in the VSI
+ * array. Once successfully called, the flow will be disabled.
+ */
+int
+ice_rem_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl)
+{
+ struct ice_vsig_prof *tmp1, *del1;
+ struct ice_chs_chg *tmp, *del;
+ struct list_head chg, copy;
+ int status;
+ u16 vsig;
+
+ INIT_LIST_HEAD(&copy);
+ INIT_LIST_HEAD(&chg);
+
+ /* determine if VSI is already part of a VSIG */
+ status = ice_vsig_find_vsi(hw, blk, vsi, &vsig);
+ if (!status && vsig) {
+ bool last_profile;
+ bool only_vsi;
+ u16 ref;
+
+ /* found in VSIG */
+ last_profile = ice_vsig_prof_id_count(hw, blk, vsig) == 1;
+ status = ice_vsig_get_ref(hw, blk, vsig, &ref);
+ if (status)
+ goto err_ice_rem_prof_id_flow;
+ only_vsi = (ref == 1);
+
+ if (only_vsi) {
+ /* If the original VSIG only contains one reference,
+ * which will be the requesting VSI, then the VSI is not
+ * sharing entries and we can simply remove the specific
+ * characteristics from the VSIG.
+ */
+
+ if (last_profile) {
+ /* If there are no profiles left for this VSIG,
+ * then simply remove the VSIG.
+ */
+ status = ice_rem_vsig(hw, blk, vsig, &chg);
+ if (status)
+ goto err_ice_rem_prof_id_flow;
+ } else {
+ status = ice_rem_prof_id_vsig(hw, blk, vsig,
+ hdl, &chg);
+ if (status)
+ goto err_ice_rem_prof_id_flow;
+
+ /* Adjust priorities */
+ status = ice_adj_prof_priorities(hw, blk, vsig,
+ &chg);
+ if (status)
+ goto err_ice_rem_prof_id_flow;
+ }
+
+ } else {
+ /* Make a copy of the VSIG's list of Profiles */
+ status = ice_get_profs_vsig(hw, blk, vsig, &copy);
+ if (status)
+ goto err_ice_rem_prof_id_flow;
+
+ /* Remove specified profile entry from the list */
+ status = ice_rem_prof_from_list(hw, &copy, hdl);
+ if (status)
+ goto err_ice_rem_prof_id_flow;
+
+ if (list_empty(&copy)) {
+ status = ice_move_vsi(hw, blk, vsi,
+ ICE_DEFAULT_VSIG, &chg);
+ if (status)
+ goto err_ice_rem_prof_id_flow;
+
+ } else if (!ice_find_dup_props_vsig(hw, blk, &copy,
+ &vsig)) {
+ /* found an exact match */
+ /* add or move VSI to the VSIG that matches */
+ /* Search for a VSIG with a matching profile
+ * list
+ */
+
+ /* Found match, move VSI to the matching VSIG */
+ status = ice_move_vsi(hw, blk, vsi, vsig, &chg);
+ if (status)
+ goto err_ice_rem_prof_id_flow;
+ } else {
+ /* since no existing VSIG supports this
+ * characteristic pattern, we need to create a
+ * new VSIG and TCAM entries
+ */
+ status = ice_create_vsig_from_lst(hw, blk, vsi,
+ &copy, &vsig,
+ &chg);
+ if (status)
+ goto err_ice_rem_prof_id_flow;
+
+ /* Adjust priorities */
+ status = ice_adj_prof_priorities(hw, blk, vsig,
+ &chg);
+ if (status)
+ goto err_ice_rem_prof_id_flow;
+ }
+ }
+ } else {
+ status = -ENOENT;
+ }
+
+ /* update hardware tables */
+ if (!status)
+ status = ice_upd_prof_hw(hw, blk, &chg);
+
+err_ice_rem_prof_id_flow:
+ list_for_each_entry_safe(del, tmp, &chg, list_entry) {
+ list_del(&del->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), del);
+ }
+
+ list_for_each_entry_safe(del1, tmp1, &copy, list) {
+ list_del(&del1->list);
+ devm_kfree(ice_hw_to_dev(hw), del1);
+ }
+
+ return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
new file mode 100644
index 000000000..9c530c867
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_FLEX_PIPE_H_
+#define _ICE_FLEX_PIPE_H_
+
+#include "ice_type.h"
+
+/* Package minimal version supported */
+#define ICE_PKG_SUPP_VER_MAJ 1
+#define ICE_PKG_SUPP_VER_MNR 3
+
+/* Package format version */
+#define ICE_PKG_FMT_VER_MAJ 1
+#define ICE_PKG_FMT_VER_MNR 0
+#define ICE_PKG_FMT_VER_UPD 0
+#define ICE_PKG_FMT_VER_DFT 0
+
+#define ICE_PKG_CNT 4
+
+enum ice_ddp_state {
+ /* Indicates that this call to ice_init_pkg
+ * successfully loaded the requested DDP package
+ */
+ ICE_DDP_PKG_SUCCESS = 0,
+
+ /* Generic error for already loaded errors, it is mapped later to
+ * the more specific one (one of the next 3)
+ */
+ ICE_DDP_PKG_ALREADY_LOADED = -1,
+
+ /* Indicates that a DDP package of the same version has already been
+ * loaded onto the device by a previous call or by another PF
+ */
+ ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED = -2,
+
+ /* The device has a DDP package that is not supported by the driver */
+ ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED = -3,
+
+ /* The device has a compatible package
+ * (but different from the request) already loaded
+ */
+ ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED = -4,
+
+ /* The firmware loaded on the device is not compatible with
+ * the DDP package loaded
+ */
+ ICE_DDP_PKG_FW_MISMATCH = -5,
+
+ /* The DDP package file is invalid */
+ ICE_DDP_PKG_INVALID_FILE = -6,
+
+ /* The version of the DDP package provided is higher than
+ * the driver supports
+ */
+ ICE_DDP_PKG_FILE_VERSION_TOO_HIGH = -7,
+
+ /* The version of the DDP package provided is lower than the
+ * driver supports
+ */
+ ICE_DDP_PKG_FILE_VERSION_TOO_LOW = -8,
+
+ /* The signature of the DDP package file provided is invalid */
+ ICE_DDP_PKG_FILE_SIGNATURE_INVALID = -9,
+
+ /* The DDP package file security revision is too low and not
+ * supported by firmware
+ */
+ ICE_DDP_PKG_FILE_REVISION_TOO_LOW = -10,
+
+ /* An error occurred in firmware while loading the DDP package */
+ ICE_DDP_PKG_LOAD_ERROR = -11,
+
+ /* Other errors */
+ ICE_DDP_PKG_ERR = -12
+};
+
+int
+ice_acquire_change_lock(struct ice_hw *hw, enum ice_aq_res_access_type access);
+void ice_release_change_lock(struct ice_hw *hw);
+int
+ice_find_prot_off(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 fv_idx,
+ u8 *prot, u16 *off);
+void
+ice_get_sw_fv_bitmap(struct ice_hw *hw, enum ice_prof_type type,
+ unsigned long *bm);
+void
+ice_init_prof_result_bm(struct ice_hw *hw);
+int
+ice_get_sw_fv_list(struct ice_hw *hw, struct ice_prot_lkup_ext *lkups,
+ unsigned long *bm, struct list_head *fv_list);
+int
+ice_pkg_buf_unreserve_section(struct ice_buf_build *bld, u16 count);
+u16 ice_pkg_buf_get_free_space(struct ice_buf_build *bld);
+int
+ice_aq_upload_section(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf,
+ u16 buf_size, struct ice_sq_cd *cd);
+bool
+ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port,
+ enum ice_tunnel_type type);
+int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table,
+ unsigned int idx, struct udp_tunnel_info *ti);
+int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table,
+ unsigned int idx, struct udp_tunnel_info *ti);
+int ice_set_dvm_boost_entries(struct ice_hw *hw);
+
+/* Rx parser PTYPE functions */
+bool ice_hw_ptype_ena(struct ice_hw *hw, u16 ptype);
+
+/* XLT2/VSI group functions */
+int
+ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
+ const struct ice_ptype_attributes *attr, u16 attr_cnt,
+ struct ice_fv_word *es, u16 *masks);
+int
+ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl);
+int
+ice_rem_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl);
+enum ice_ddp_state ice_init_pkg(struct ice_hw *hw, u8 *buff, u32 len);
+enum ice_ddp_state
+ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, u32 len);
+bool ice_is_init_pkg_successful(enum ice_ddp_state state);
+int ice_init_hw_tbls(struct ice_hw *hw);
+void ice_free_seg(struct ice_hw *hw);
+void ice_fill_blk_tbls(struct ice_hw *hw);
+void ice_clear_hw_tbls(struct ice_hw *hw);
+void ice_free_hw_tbls(struct ice_hw *hw);
+int ice_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 id);
+struct ice_buf_build *
+ice_pkg_buf_alloc_single_section(struct ice_hw *hw, u32 type, u16 size,
+ void **section);
+struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld);
+void ice_pkg_buf_free(struct ice_hw *hw, struct ice_buf_build *bld);
+
+#endif /* _ICE_FLEX_PIPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h
new file mode 100644
index 000000000..974d14a83
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h
@@ -0,0 +1,709 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_FLEX_TYPE_H_
+#define _ICE_FLEX_TYPE_H_
+
+#define ICE_FV_OFFSET_INVAL 0x1FF
+
+/* Extraction Sequence (Field Vector) Table */
+struct ice_fv_word {
+ u8 prot_id;
+ u16 off; /* Offset within the protocol header */
+ u8 resvrd;
+} __packed;
+
+#define ICE_MAX_NUM_PROFILES 256
+
+#define ICE_MAX_FV_WORDS 48
+struct ice_fv {
+ struct ice_fv_word ew[ICE_MAX_FV_WORDS];
+};
+
+/* Package and segment headers and tables */
+struct ice_pkg_hdr {
+ struct ice_pkg_ver pkg_format_ver;
+ __le32 seg_count;
+ __le32 seg_offset[];
+};
+
+/* generic segment */
+struct ice_generic_seg_hdr {
+#define SEGMENT_TYPE_METADATA 0x00000001
+#define SEGMENT_TYPE_ICE 0x00000010
+ __le32 seg_type;
+ struct ice_pkg_ver seg_format_ver;
+ __le32 seg_size;
+ char seg_id[ICE_PKG_NAME_SIZE];
+};
+
+/* ice specific segment */
+
+union ice_device_id {
+ struct {
+ __le16 device_id;
+ __le16 vendor_id;
+ } dev_vend_id;
+ __le32 id;
+};
+
+struct ice_device_id_entry {
+ union ice_device_id device;
+ union ice_device_id sub_device;
+};
+
+struct ice_seg {
+ struct ice_generic_seg_hdr hdr;
+ __le32 device_table_count;
+ struct ice_device_id_entry device_table[];
+};
+
+struct ice_nvm_table {
+ __le32 table_count;
+ __le32 vers[];
+};
+
+struct ice_buf {
+#define ICE_PKG_BUF_SIZE 4096
+ u8 buf[ICE_PKG_BUF_SIZE];
+};
+
+struct ice_buf_table {
+ __le32 buf_count;
+ struct ice_buf buf_array[];
+};
+
+/* global metadata specific segment */
+struct ice_global_metadata_seg {
+ struct ice_generic_seg_hdr hdr;
+ struct ice_pkg_ver pkg_ver;
+ __le32 rsvd;
+ char pkg_name[ICE_PKG_NAME_SIZE];
+};
+
+#define ICE_MIN_S_OFF 12
+#define ICE_MAX_S_OFF 4095
+#define ICE_MIN_S_SZ 1
+#define ICE_MAX_S_SZ 4084
+
+/* section information */
+struct ice_section_entry {
+ __le32 type;
+ __le16 offset;
+ __le16 size;
+};
+
+#define ICE_MIN_S_COUNT 1
+#define ICE_MAX_S_COUNT 511
+#define ICE_MIN_S_DATA_END 12
+#define ICE_MAX_S_DATA_END 4096
+
+#define ICE_METADATA_BUF 0x80000000
+
+struct ice_buf_hdr {
+ __le16 section_count;
+ __le16 data_end;
+ struct ice_section_entry section_entry[];
+};
+
+#define ICE_MAX_ENTRIES_IN_BUF(hd_sz, ent_sz) ((ICE_PKG_BUF_SIZE - \
+ struct_size((struct ice_buf_hdr *)0, section_entry, 1) - (hd_sz)) /\
+ (ent_sz))
+
+/* ice package section IDs */
+#define ICE_SID_METADATA 1
+#define ICE_SID_XLT0_SW 10
+#define ICE_SID_XLT_KEY_BUILDER_SW 11
+#define ICE_SID_XLT1_SW 12
+#define ICE_SID_XLT2_SW 13
+#define ICE_SID_PROFID_TCAM_SW 14
+#define ICE_SID_PROFID_REDIR_SW 15
+#define ICE_SID_FLD_VEC_SW 16
+#define ICE_SID_CDID_KEY_BUILDER_SW 17
+
+struct ice_meta_sect {
+ struct ice_pkg_ver ver;
+#define ICE_META_SECT_NAME_SIZE 28
+ char name[ICE_META_SECT_NAME_SIZE];
+ __le32 track_id;
+};
+
+#define ICE_SID_CDID_REDIR_SW 18
+
+#define ICE_SID_XLT0_ACL 20
+#define ICE_SID_XLT_KEY_BUILDER_ACL 21
+#define ICE_SID_XLT1_ACL 22
+#define ICE_SID_XLT2_ACL 23
+#define ICE_SID_PROFID_TCAM_ACL 24
+#define ICE_SID_PROFID_REDIR_ACL 25
+#define ICE_SID_FLD_VEC_ACL 26
+#define ICE_SID_CDID_KEY_BUILDER_ACL 27
+#define ICE_SID_CDID_REDIR_ACL 28
+
+#define ICE_SID_XLT0_FD 30
+#define ICE_SID_XLT_KEY_BUILDER_FD 31
+#define ICE_SID_XLT1_FD 32
+#define ICE_SID_XLT2_FD 33
+#define ICE_SID_PROFID_TCAM_FD 34
+#define ICE_SID_PROFID_REDIR_FD 35
+#define ICE_SID_FLD_VEC_FD 36
+#define ICE_SID_CDID_KEY_BUILDER_FD 37
+#define ICE_SID_CDID_REDIR_FD 38
+
+#define ICE_SID_XLT0_RSS 40
+#define ICE_SID_XLT_KEY_BUILDER_RSS 41
+#define ICE_SID_XLT1_RSS 42
+#define ICE_SID_XLT2_RSS 43
+#define ICE_SID_PROFID_TCAM_RSS 44
+#define ICE_SID_PROFID_REDIR_RSS 45
+#define ICE_SID_FLD_VEC_RSS 46
+#define ICE_SID_CDID_KEY_BUILDER_RSS 47
+#define ICE_SID_CDID_REDIR_RSS 48
+
+#define ICE_SID_RXPARSER_MARKER_PTYPE 55
+#define ICE_SID_RXPARSER_BOOST_TCAM 56
+#define ICE_SID_RXPARSER_METADATA_INIT 58
+#define ICE_SID_TXPARSER_BOOST_TCAM 66
+
+#define ICE_SID_XLT0_PE 80
+#define ICE_SID_XLT_KEY_BUILDER_PE 81
+#define ICE_SID_XLT1_PE 82
+#define ICE_SID_XLT2_PE 83
+#define ICE_SID_PROFID_TCAM_PE 84
+#define ICE_SID_PROFID_REDIR_PE 85
+#define ICE_SID_FLD_VEC_PE 86
+#define ICE_SID_CDID_KEY_BUILDER_PE 87
+#define ICE_SID_CDID_REDIR_PE 88
+
+/* Label Metadata section IDs */
+#define ICE_SID_LBL_FIRST 0x80000010
+#define ICE_SID_LBL_RXPARSER_TMEM 0x80000018
+/* The following define MUST be updated to reflect the last label section ID */
+#define ICE_SID_LBL_LAST 0x80000038
+
+enum ice_block {
+ ICE_BLK_SW = 0,
+ ICE_BLK_ACL,
+ ICE_BLK_FD,
+ ICE_BLK_RSS,
+ ICE_BLK_PE,
+ ICE_BLK_COUNT
+};
+
+enum ice_sect {
+ ICE_XLT0 = 0,
+ ICE_XLT_KB,
+ ICE_XLT1,
+ ICE_XLT2,
+ ICE_PROF_TCAM,
+ ICE_PROF_REDIR,
+ ICE_VEC_TBL,
+ ICE_CDID_KB,
+ ICE_CDID_REDIR,
+ ICE_SECT_COUNT
+};
+
+/* Packet Type (PTYPE) values */
+#define ICE_PTYPE_MAC_PAY 1
+#define ICE_PTYPE_IPV4_PAY 23
+#define ICE_PTYPE_IPV4_UDP_PAY 24
+#define ICE_PTYPE_IPV4_TCP_PAY 26
+#define ICE_PTYPE_IPV4_SCTP_PAY 27
+#define ICE_PTYPE_IPV6_PAY 89
+#define ICE_PTYPE_IPV6_UDP_PAY 90
+#define ICE_PTYPE_IPV6_TCP_PAY 92
+#define ICE_PTYPE_IPV6_SCTP_PAY 93
+#define ICE_MAC_IPV4_ESP 160
+#define ICE_MAC_IPV6_ESP 161
+#define ICE_MAC_IPV4_AH 162
+#define ICE_MAC_IPV6_AH 163
+#define ICE_MAC_IPV4_NAT_T_ESP 164
+#define ICE_MAC_IPV6_NAT_T_ESP 165
+#define ICE_MAC_IPV4_GTPU 329
+#define ICE_MAC_IPV6_GTPU 330
+#define ICE_MAC_IPV4_GTPU_IPV4_FRAG 331
+#define ICE_MAC_IPV4_GTPU_IPV4_PAY 332
+#define ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY 333
+#define ICE_MAC_IPV4_GTPU_IPV4_TCP 334
+#define ICE_MAC_IPV4_GTPU_IPV4_ICMP 335
+#define ICE_MAC_IPV6_GTPU_IPV4_FRAG 336
+#define ICE_MAC_IPV6_GTPU_IPV4_PAY 337
+#define ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY 338
+#define ICE_MAC_IPV6_GTPU_IPV4_TCP 339
+#define ICE_MAC_IPV6_GTPU_IPV4_ICMP 340
+#define ICE_MAC_IPV4_GTPU_IPV6_FRAG 341
+#define ICE_MAC_IPV4_GTPU_IPV6_PAY 342
+#define ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY 343
+#define ICE_MAC_IPV4_GTPU_IPV6_TCP 344
+#define ICE_MAC_IPV4_GTPU_IPV6_ICMPV6 345
+#define ICE_MAC_IPV6_GTPU_IPV6_FRAG 346
+#define ICE_MAC_IPV6_GTPU_IPV6_PAY 347
+#define ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY 348
+#define ICE_MAC_IPV6_GTPU_IPV6_TCP 349
+#define ICE_MAC_IPV6_GTPU_IPV6_ICMPV6 350
+#define ICE_MAC_IPV4_PFCP_SESSION 352
+#define ICE_MAC_IPV6_PFCP_SESSION 354
+#define ICE_MAC_IPV4_L2TPV3 360
+#define ICE_MAC_IPV6_L2TPV3 361
+
+/* Attributes that can modify PTYPE definitions.
+ *
+ * These values will represent special attributes for PTYPEs, which will
+ * resolve into metadata packet flags definitions that can be used in the TCAM
+ * for identifying a PTYPE with specific characteristics.
+ */
+enum ice_ptype_attrib_type {
+ /* GTP PTYPEs */
+ ICE_PTYPE_ATTR_GTP_PDU_EH,
+ ICE_PTYPE_ATTR_GTP_SESSION,
+ ICE_PTYPE_ATTR_GTP_DOWNLINK,
+ ICE_PTYPE_ATTR_GTP_UPLINK,
+};
+
+struct ice_ptype_attrib_info {
+ u16 flags;
+ u16 mask;
+};
+
+/* TCAM flag definitions */
+#define ICE_GTP_PDU BIT(14)
+#define ICE_GTP_PDU_LINK BIT(13)
+
+/* GTP attributes */
+#define ICE_GTP_PDU_FLAG_MASK (ICE_GTP_PDU)
+#define ICE_GTP_PDU_EH ICE_GTP_PDU
+
+#define ICE_GTP_FLAGS_MASK (ICE_GTP_PDU | ICE_GTP_PDU_LINK)
+#define ICE_GTP_SESSION 0
+#define ICE_GTP_DOWNLINK ICE_GTP_PDU
+#define ICE_GTP_UPLINK (ICE_GTP_PDU | ICE_GTP_PDU_LINK)
+
+struct ice_ptype_attributes {
+ u16 ptype;
+ enum ice_ptype_attrib_type attrib;
+};
+
+/* package labels */
+struct ice_label {
+ __le16 value;
+#define ICE_PKG_LABEL_SIZE 64
+ char name[ICE_PKG_LABEL_SIZE];
+};
+
+struct ice_label_section {
+ __le16 count;
+ struct ice_label label[];
+};
+
+#define ICE_MAX_LABELS_IN_BUF ICE_MAX_ENTRIES_IN_BUF( \
+ struct_size((struct ice_label_section *)0, label, 1) - \
+ sizeof(struct ice_label), sizeof(struct ice_label))
+
+struct ice_sw_fv_section {
+ __le16 count;
+ __le16 base_offset;
+ struct ice_fv fv[];
+};
+
+struct ice_sw_fv_list_entry {
+ struct list_head list_entry;
+ u32 profile_id;
+ struct ice_fv *fv_ptr;
+};
+
+/* The BOOST TCAM stores the match packet header in reverse order, meaning
+ * the fields are reversed; in addition, this means that the normally big endian
+ * fields of the packet are now little endian.
+ */
+struct ice_boost_key_value {
+#define ICE_BOOST_REMAINING_HV_KEY 15
+ u8 remaining_hv_key[ICE_BOOST_REMAINING_HV_KEY];
+ __le16 hv_dst_port_key;
+ __le16 hv_src_port_key;
+ u8 tcam_search_key;
+} __packed;
+
+struct ice_boost_key {
+ struct ice_boost_key_value key;
+ struct ice_boost_key_value key2;
+};
+
+/* package Boost TCAM entry */
+struct ice_boost_tcam_entry {
+ __le16 addr;
+ __le16 reserved;
+ /* break up the 40 bytes of key into different fields */
+ struct ice_boost_key key;
+ u8 boost_hit_index_group;
+ /* The following contains bitfields which are not on byte boundaries.
+ * These fields are currently unused by driver software.
+ */
+#define ICE_BOOST_BIT_FIELDS 43
+ u8 bit_fields[ICE_BOOST_BIT_FIELDS];
+};
+
+struct ice_boost_tcam_section {
+ __le16 count;
+ __le16 reserved;
+ struct ice_boost_tcam_entry tcam[];
+};
+
+#define ICE_MAX_BST_TCAMS_IN_BUF ICE_MAX_ENTRIES_IN_BUF( \
+ struct_size((struct ice_boost_tcam_section *)0, tcam, 1) - \
+ sizeof(struct ice_boost_tcam_entry), \
+ sizeof(struct ice_boost_tcam_entry))
+
+/* package Marker Ptype TCAM entry */
+struct ice_marker_ptype_tcam_entry {
+#define ICE_MARKER_PTYPE_TCAM_ADDR_MAX 1024
+ __le16 addr;
+ __le16 ptype;
+ u8 keys[20];
+};
+
+struct ice_marker_ptype_tcam_section {
+ __le16 count;
+ __le16 reserved;
+ struct ice_marker_ptype_tcam_entry tcam[];
+};
+
+#define ICE_MAX_MARKER_PTYPE_TCAMS_IN_BUF \
+ ICE_MAX_ENTRIES_IN_BUF(struct_size((struct ice_marker_ptype_tcam_section *)0, tcam, 1) - \
+ sizeof(struct ice_marker_ptype_tcam_entry), \
+ sizeof(struct ice_marker_ptype_tcam_entry))
+
+struct ice_xlt1_section {
+ __le16 count;
+ __le16 offset;
+ u8 value[];
+};
+
+struct ice_xlt2_section {
+ __le16 count;
+ __le16 offset;
+ __le16 value[];
+};
+
+struct ice_prof_redir_section {
+ __le16 count;
+ __le16 offset;
+ u8 redir_value[];
+};
+
+/* package buffer building */
+
+struct ice_buf_build {
+ struct ice_buf buf;
+ u16 reserved_section_table_entries;
+};
+
+struct ice_pkg_enum {
+ struct ice_buf_table *buf_table;
+ u32 buf_idx;
+
+ u32 type;
+ struct ice_buf_hdr *buf;
+ u32 sect_idx;
+ void *sect;
+ u32 sect_type;
+
+ u32 entry_idx;
+ void *(*handler)(u32 sect_type, void *section, u32 index, u32 *offset);
+};
+
+/* Tunnel enabling */
+
+enum ice_tunnel_type {
+ TNL_VXLAN = 0,
+ TNL_GENEVE,
+ TNL_GRETAP,
+ TNL_GTPC,
+ TNL_GTPU,
+ __TNL_TYPE_CNT,
+ TNL_LAST = 0xFF,
+ TNL_ALL = 0xFF,
+};
+
+struct ice_tunnel_type_scan {
+ enum ice_tunnel_type type;
+ const char *label_prefix;
+};
+
+struct ice_tunnel_entry {
+ enum ice_tunnel_type type;
+ u16 boost_addr;
+ u16 port;
+ struct ice_boost_tcam_entry *boost_entry;
+ u8 valid;
+};
+
+#define ICE_TUNNEL_MAX_ENTRIES 16
+
+struct ice_tunnel_table {
+ struct ice_tunnel_entry tbl[ICE_TUNNEL_MAX_ENTRIES];
+ u16 count;
+ u16 valid_count[__TNL_TYPE_CNT];
+};
+
+struct ice_dvm_entry {
+ u16 boost_addr;
+ u16 enable;
+ struct ice_boost_tcam_entry *boost_entry;
+};
+
+#define ICE_DVM_MAX_ENTRIES 48
+
+struct ice_dvm_table {
+ struct ice_dvm_entry tbl[ICE_DVM_MAX_ENTRIES];
+ u16 count;
+};
+
+struct ice_pkg_es {
+ __le16 count;
+ __le16 offset;
+ struct ice_fv_word es[];
+};
+
+struct ice_es {
+ u32 sid;
+ u16 count;
+ u16 fvw;
+ u16 *ref_count;
+ u32 *mask_ena;
+ struct list_head prof_map;
+ struct ice_fv_word *t;
+ struct mutex prof_map_lock; /* protect access to profiles list */
+ u8 *written;
+ u8 reverse; /* set to true to reverse FV order */
+};
+
+/* PTYPE Group management */
+
+/* Note: XLT1 table takes 13-bit as input, and results in an 8-bit packet type
+ * group (PTG) ID as output.
+ *
+ * Note: PTG 0 is the default packet type group and it is assumed that all PTYPE
+ * are a part of this group until moved to a new PTG.
+ */
+#define ICE_DEFAULT_PTG 0
+
+struct ice_ptg_entry {
+ struct ice_ptg_ptype *first_ptype;
+ u8 in_use;
+};
+
+struct ice_ptg_ptype {
+ struct ice_ptg_ptype *next_ptype;
+ u8 ptg;
+};
+
+#define ICE_MAX_TCAM_PER_PROFILE 32
+#define ICE_MAX_PTG_PER_PROFILE 32
+
+struct ice_prof_map {
+ struct list_head list;
+ u64 profile_cookie;
+ u64 context;
+ u8 prof_id;
+ u8 ptg_cnt;
+ u8 ptg[ICE_MAX_PTG_PER_PROFILE];
+ struct ice_ptype_attrib_info attr[ICE_MAX_PTG_PER_PROFILE];
+};
+
+#define ICE_INVALID_TCAM 0xFFFF
+
+struct ice_tcam_inf {
+ u16 tcam_idx;
+ struct ice_ptype_attrib_info attr;
+ u8 ptg;
+ u8 prof_id;
+ u8 in_use;
+};
+
+struct ice_vsig_prof {
+ struct list_head list;
+ u64 profile_cookie;
+ u8 prof_id;
+ u8 tcam_count;
+ struct ice_tcam_inf tcam[ICE_MAX_TCAM_PER_PROFILE];
+};
+
+struct ice_vsig_entry {
+ struct list_head prop_lst;
+ struct ice_vsig_vsi *first_vsi;
+ u8 in_use;
+};
+
+struct ice_vsig_vsi {
+ struct ice_vsig_vsi *next_vsi;
+ u32 prop_mask;
+ u16 changed;
+ u16 vsig;
+};
+
+#define ICE_XLT1_CNT 1024
+#define ICE_MAX_PTGS 256
+
+/* XLT1 Table */
+struct ice_xlt1 {
+ struct ice_ptg_entry *ptg_tbl;
+ struct ice_ptg_ptype *ptypes;
+ u8 *t;
+ u32 sid;
+ u16 count;
+};
+
+#define ICE_XLT2_CNT 768
+#define ICE_MAX_VSIGS 768
+
+/* VSIG bit layout:
+ * [0:12]: incremental VSIG index 1 to ICE_MAX_VSIGS
+ * [13:15]: PF number of device
+ */
+#define ICE_VSIG_IDX_M (0x1FFF)
+#define ICE_PF_NUM_S 13
+#define ICE_PF_NUM_M (0x07 << ICE_PF_NUM_S)
+#define ICE_VSIG_VALUE(vsig, pf_id) \
+ ((u16)((((u16)(vsig)) & ICE_VSIG_IDX_M) | \
+ (((u16)(pf_id) << ICE_PF_NUM_S) & ICE_PF_NUM_M)))
+#define ICE_DEFAULT_VSIG 0
+
+/* XLT2 Table */
+struct ice_xlt2 {
+ struct ice_vsig_entry *vsig_tbl;
+ struct ice_vsig_vsi *vsis;
+ u16 *t;
+ u32 sid;
+ u16 count;
+};
+
+/* Profile ID Management */
+struct ice_prof_id_key {
+ __le16 flags;
+ u8 xlt1;
+ __le16 xlt2_cdid;
+} __packed;
+
+/* Keys are made up of two values, each one-half the size of the key.
+ * For TCAM, the entire key is 80 bits wide (or 2, 40-bit wide values)
+ */
+#define ICE_TCAM_KEY_VAL_SZ 5
+#define ICE_TCAM_KEY_SZ (2 * ICE_TCAM_KEY_VAL_SZ)
+
+struct ice_prof_tcam_entry {
+ __le16 addr;
+ u8 key[ICE_TCAM_KEY_SZ];
+ u8 prof_id;
+} __packed;
+
+struct ice_prof_id_section {
+ __le16 count;
+ struct ice_prof_tcam_entry entry[];
+};
+
+struct ice_prof_tcam {
+ u32 sid;
+ u16 count;
+ u16 max_prof_id;
+ struct ice_prof_tcam_entry *t;
+ u8 cdid_bits; /* # CDID bits to use in key, 0, 2, 4, or 8 */
+};
+
+struct ice_prof_redir {
+ u8 *t;
+ u32 sid;
+ u16 count;
+};
+
+struct ice_mask {
+ u16 mask; /* 16-bit mask */
+ u16 idx; /* index */
+ u16 ref; /* reference count */
+ u8 in_use; /* non-zero if used */
+};
+
+struct ice_masks {
+ struct mutex lock; /* lock to protect this structure */
+ u16 first; /* first mask owned by the PF */
+ u16 count; /* number of masks owned by the PF */
+#define ICE_PROF_MASK_COUNT 32
+ struct ice_mask masks[ICE_PROF_MASK_COUNT];
+};
+
+/* Tables per block */
+struct ice_blk_info {
+ struct ice_xlt1 xlt1;
+ struct ice_xlt2 xlt2;
+ struct ice_prof_tcam prof;
+ struct ice_prof_redir prof_redir;
+ struct ice_es es;
+ struct ice_masks masks;
+ u8 overwrite; /* set to true to allow overwrite of table entries */
+ u8 is_list_init;
+};
+
+enum ice_chg_type {
+ ICE_TCAM_NONE = 0,
+ ICE_PTG_ES_ADD,
+ ICE_TCAM_ADD,
+ ICE_VSIG_ADD,
+ ICE_VSIG_REM,
+ ICE_VSI_MOVE,
+};
+
+struct ice_chs_chg {
+ struct list_head list_entry;
+ enum ice_chg_type type;
+
+ u8 add_ptg;
+ u8 add_vsig;
+ u8 add_tcam_idx;
+ u8 add_prof;
+ u16 ptype;
+ u8 ptg;
+ u8 prof_id;
+ u16 vsi;
+ u16 vsig;
+ u16 orig_vsig;
+ u16 tcam_idx;
+ struct ice_ptype_attrib_info attr;
+};
+
+#define ICE_FLOW_PTYPE_MAX ICE_XLT1_CNT
+
+enum ice_prof_type {
+ ICE_PROF_NON_TUN = 0x1,
+ ICE_PROF_TUN_UDP = 0x2,
+ ICE_PROF_TUN_GRE = 0x4,
+ ICE_PROF_TUN_GTPU = 0x8,
+ ICE_PROF_TUN_GTPC = 0x10,
+ ICE_PROF_TUN_ALL = 0x1E,
+ ICE_PROF_ALL = 0xFF,
+};
+
+/* Number of bits/bytes contained in meta init entry. Note, this should be a
+ * multiple of 32 bits.
+ */
+#define ICE_META_INIT_BITS 192
+#define ICE_META_INIT_DW_CNT (ICE_META_INIT_BITS / (sizeof(__le32) * \
+ BITS_PER_BYTE))
+
+/* The meta init Flag field starts at this bit */
+#define ICE_META_FLAGS_ST 123
+
+/* The entry and bit to check for Double VLAN Mode (DVM) support */
+#define ICE_META_VLAN_MODE_ENTRY 0
+#define ICE_META_FLAG_VLAN_MODE 60
+#define ICE_META_VLAN_MODE_BIT (ICE_META_FLAGS_ST + \
+ ICE_META_FLAG_VLAN_MODE)
+
+struct ice_meta_init_entry {
+ __le32 bm[ICE_META_INIT_DW_CNT];
+};
+
+struct ice_meta_init_section {
+ __le16 count;
+ __le16 offset;
+ struct ice_meta_init_entry entry;
+};
+#endif /* _ICE_FLEX_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
new file mode 100644
index 000000000..ef103e47a
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -0,0 +1,2474 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_flow.h"
+#include <net/gre.h>
+
+/* Describe properties of a protocol header field */
+struct ice_flow_field_info {
+ enum ice_flow_seg_hdr hdr;
+ s16 off; /* Offset from start of a protocol header, in bits */
+ u16 size; /* Size of fields in bits */
+ u16 mask; /* 16-bit mask for field */
+};
+
+#define ICE_FLOW_FLD_INFO(_hdr, _offset_bytes, _size_bytes) { \
+ .hdr = _hdr, \
+ .off = (_offset_bytes) * BITS_PER_BYTE, \
+ .size = (_size_bytes) * BITS_PER_BYTE, \
+ .mask = 0, \
+}
+
+#define ICE_FLOW_FLD_INFO_MSK(_hdr, _offset_bytes, _size_bytes, _mask) { \
+ .hdr = _hdr, \
+ .off = (_offset_bytes) * BITS_PER_BYTE, \
+ .size = (_size_bytes) * BITS_PER_BYTE, \
+ .mask = _mask, \
+}
+
+/* Table containing properties of supported protocol header fields */
+static const
+struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = {
+ /* Ether */
+ /* ICE_FLOW_FIELD_IDX_ETH_DA */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ETH, 0, ETH_ALEN),
+ /* ICE_FLOW_FIELD_IDX_ETH_SA */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ETH, ETH_ALEN, ETH_ALEN),
+ /* ICE_FLOW_FIELD_IDX_S_VLAN */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_VLAN, 12, sizeof(__be16)),
+ /* ICE_FLOW_FIELD_IDX_C_VLAN */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_VLAN, 14, sizeof(__be16)),
+ /* ICE_FLOW_FIELD_IDX_ETH_TYPE */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ETH, 0, sizeof(__be16)),
+ /* IPv4 / IPv6 */
+ /* ICE_FLOW_FIELD_IDX_IPV4_DSCP */
+ ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_IPV4, 0, 1, 0x00fc),
+ /* ICE_FLOW_FIELD_IDX_IPV6_DSCP */
+ ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_IPV6, 0, 1, 0x0ff0),
+ /* ICE_FLOW_FIELD_IDX_IPV4_TTL */
+ ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 8, 1, 0xff00),
+ /* ICE_FLOW_FIELD_IDX_IPV4_PROT */
+ ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 8, 1, 0x00ff),
+ /* ICE_FLOW_FIELD_IDX_IPV6_TTL */
+ ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 6, 1, 0x00ff),
+ /* ICE_FLOW_FIELD_IDX_IPV6_PROT */
+ ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 6, 1, 0xff00),
+ /* ICE_FLOW_FIELD_IDX_IPV4_SA */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV4, 12, sizeof(struct in_addr)),
+ /* ICE_FLOW_FIELD_IDX_IPV4_DA */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV4, 16, sizeof(struct in_addr)),
+ /* ICE_FLOW_FIELD_IDX_IPV6_SA */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 8, sizeof(struct in6_addr)),
+ /* ICE_FLOW_FIELD_IDX_IPV6_DA */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 24, sizeof(struct in6_addr)),
+ /* Transport */
+ /* ICE_FLOW_FIELD_IDX_TCP_SRC_PORT */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 0, sizeof(__be16)),
+ /* ICE_FLOW_FIELD_IDX_TCP_DST_PORT */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 2, sizeof(__be16)),
+ /* ICE_FLOW_FIELD_IDX_UDP_SRC_PORT */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_UDP, 0, sizeof(__be16)),
+ /* ICE_FLOW_FIELD_IDX_UDP_DST_PORT */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_UDP, 2, sizeof(__be16)),
+ /* ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 0, sizeof(__be16)),
+ /* ICE_FLOW_FIELD_IDX_SCTP_DST_PORT */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 2, sizeof(__be16)),
+ /* ICE_FLOW_FIELD_IDX_TCP_FLAGS */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 13, 1),
+ /* ARP */
+ /* ICE_FLOW_FIELD_IDX_ARP_SIP */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 14, sizeof(struct in_addr)),
+ /* ICE_FLOW_FIELD_IDX_ARP_DIP */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 24, sizeof(struct in_addr)),
+ /* ICE_FLOW_FIELD_IDX_ARP_SHA */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 8, ETH_ALEN),
+ /* ICE_FLOW_FIELD_IDX_ARP_DHA */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 18, ETH_ALEN),
+ /* ICE_FLOW_FIELD_IDX_ARP_OP */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 6, sizeof(__be16)),
+ /* ICMP */
+ /* ICE_FLOW_FIELD_IDX_ICMP_TYPE */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ICMP, 0, 1),
+ /* ICE_FLOW_FIELD_IDX_ICMP_CODE */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ICMP, 1, 1),
+ /* GRE */
+ /* ICE_FLOW_FIELD_IDX_GRE_KEYID */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GRE, 12,
+ sizeof_field(struct gre_full_hdr, key)),
+ /* GTP */
+ /* ICE_FLOW_FIELD_IDX_GTPC_TEID */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPC_TEID, 12, sizeof(__be32)),
+ /* ICE_FLOW_FIELD_IDX_GTPU_IP_TEID */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_IP, 12, sizeof(__be32)),
+ /* ICE_FLOW_FIELD_IDX_GTPU_EH_TEID */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_EH, 12, sizeof(__be32)),
+ /* ICE_FLOW_FIELD_IDX_GTPU_EH_QFI */
+ ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_GTPU_EH, 22, sizeof(__be16),
+ 0x3f00),
+ /* ICE_FLOW_FIELD_IDX_GTPU_UP_TEID */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_UP, 12, sizeof(__be32)),
+ /* ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_DWN, 12, sizeof(__be32)),
+ /* PPPoE */
+ /* ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_PPPOE, 2, sizeof(__be16)),
+ /* PFCP */
+ /* ICE_FLOW_FIELD_IDX_PFCP_SEID */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_PFCP_SESSION, 12, sizeof(__be64)),
+ /* L2TPv3 */
+ /* ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_L2TPV3, 0, sizeof(__be32)),
+ /* ESP */
+ /* ICE_FLOW_FIELD_IDX_ESP_SPI */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ESP, 0, sizeof(__be32)),
+ /* AH */
+ /* ICE_FLOW_FIELD_IDX_AH_SPI */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_AH, 4, sizeof(__be32)),
+ /* NAT_T_ESP */
+ /* ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI */
+ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_NAT_T_ESP, 8, sizeof(__be32)),
+};
+
+/* Bitmaps indicating relevant packet types for a particular protocol header
+ *
+ * Packet types for packets with an Outer/First/Single MAC header
+ */
+static const u32 ice_ptypes_mac_ofos[] = {
+ 0xFDC00846, 0xBFBF7F7E, 0xF70001DF, 0xFEFDFDFB,
+ 0x0000077E, 0x00000000, 0x00000000, 0x00000000,
+ 0x00400000, 0x03FFF000, 0x7FFFFFE0, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last MAC VLAN header */
+static const u32 ice_ptypes_macvlan_il[] = {
+ 0x00000000, 0xBC000000, 0x000001DF, 0xF0000000,
+ 0x0000077E, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv4 header, does NOT
+ * include IPv4 other PTYPEs
+ */
+static const u32 ice_ptypes_ipv4_ofos[] = {
+ 0x1DC00000, 0x04000800, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000155, 0x00000000, 0x00000000,
+ 0x00000000, 0x000FC000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv4 header, includes
+ * IPv4 other PTYPEs
+ */
+static const u32 ice_ptypes_ipv4_ofos_all[] = {
+ 0x1DC00000, 0x04000800, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000155, 0x00000000, 0x00000000,
+ 0x00000000, 0x000FC000, 0x83E0F800, 0x00000101,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last IPv4 header */
+static const u32 ice_ptypes_ipv4_il[] = {
+ 0xE0000000, 0xB807700E, 0x80000003, 0xE01DC03B,
+ 0x0000000E, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x001FF800, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv6 header, does NOT
+ * include IPv6 other PTYPEs
+ */
+static const u32 ice_ptypes_ipv6_ofos[] = {
+ 0x00000000, 0x00000000, 0x77000000, 0x10002000,
+ 0x00000000, 0x000002AA, 0x00000000, 0x00000000,
+ 0x00000000, 0x03F00000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv6 header, includes
+ * IPv6 other PTYPEs
+ */
+static const u32 ice_ptypes_ipv6_ofos_all[] = {
+ 0x00000000, 0x00000000, 0x77000000, 0x10002000,
+ 0x00000000, 0x000002AA, 0x00000000, 0x00000000,
+ 0x00080F00, 0x03F00000, 0x7C1F0000, 0x00000206,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last IPv6 header */
+static const u32 ice_ptypes_ipv6_il[] = {
+ 0x00000000, 0x03B80770, 0x000001DC, 0x0EE00000,
+ 0x00000770, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x7FE00000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv4 header - no L4 */
+static const u32 ice_ptypes_ipv4_ofos_no_l4[] = {
+ 0x10C00000, 0x04000800, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outermost/First ARP header */
+static const u32 ice_ptypes_arp_of[] = {
+ 0x00000800, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last IPv4 header - no L4 */
+static const u32 ice_ptypes_ipv4_il_no_l4[] = {
+ 0x60000000, 0x18043008, 0x80000002, 0x6010c021,
+ 0x00000008, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv6 header - no L4 */
+static const u32 ice_ptypes_ipv6_ofos_no_l4[] = {
+ 0x00000000, 0x00000000, 0x43000000, 0x10002000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last IPv6 header - no L4 */
+static const u32 ice_ptypes_ipv6_il_no_l4[] = {
+ 0x00000000, 0x02180430, 0x0000010c, 0x086010c0,
+ 0x00000430, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* UDP Packet types for non-tunneled packets or tunneled
+ * packets with inner UDP.
+ */
+static const u32 ice_ptypes_udp_il[] = {
+ 0x81000000, 0x20204040, 0x04000010, 0x80810102,
+ 0x00000040, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00410000, 0x90842000, 0x00000007,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last TCP header */
+static const u32 ice_ptypes_tcp_il[] = {
+ 0x04000000, 0x80810102, 0x10000040, 0x02040408,
+ 0x00000102, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00820000, 0x21084000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last SCTP header */
+static const u32 ice_ptypes_sctp_il[] = {
+ 0x08000000, 0x01020204, 0x20000081, 0x04080810,
+ 0x00000204, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x01040000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outermost/First ICMP header */
+static const u32 ice_ptypes_icmp_of[] = {
+ 0x10000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last ICMP header */
+static const u32 ice_ptypes_icmp_il[] = {
+ 0x00000000, 0x02040408, 0x40000102, 0x08101020,
+ 0x00000408, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x42108000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outermost/First GRE header */
+static const u32 ice_ptypes_gre_of[] = {
+ 0x00000000, 0xBFBF7800, 0x000001DF, 0xFEFDE000,
+ 0x0000017E, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last MAC header */
+static const u32 ice_ptypes_mac_il[] = {
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for GTPC */
+static const u32 ice_ptypes_gtpc[] = {
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000180, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for GTPC with TEID */
+static const u32 ice_ptypes_gtpc_tid[] = {
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000060, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for GTPU */
+static const struct ice_ptype_attributes ice_attr_gtpu_eh[] = {
+ { ICE_MAC_IPV4_GTPU_IPV4_FRAG, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV4_GTPU_IPV4_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV4_GTPU_IPV4_TCP, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV4_GTPU_IPV4_ICMP, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV6_GTPU_IPV4_FRAG, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV6_GTPU_IPV4_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV6_GTPU_IPV4_TCP, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV6_GTPU_IPV4_ICMP, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV4_GTPU_IPV6_FRAG, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV4_GTPU_IPV6_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV4_GTPU_IPV6_TCP, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV4_GTPU_IPV6_ICMPV6, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV6_GTPU_IPV6_FRAG, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV6_GTPU_IPV6_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV6_GTPU_IPV6_TCP, ICE_PTYPE_ATTR_GTP_PDU_EH },
+ { ICE_MAC_IPV6_GTPU_IPV6_ICMPV6, ICE_PTYPE_ATTR_GTP_PDU_EH },
+};
+
+static const struct ice_ptype_attributes ice_attr_gtpu_down[] = {
+ { ICE_MAC_IPV4_GTPU_IPV4_FRAG, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV4_GTPU_IPV4_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV4_GTPU_IPV4_TCP, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV4_GTPU_IPV4_ICMP, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV6_GTPU_IPV4_FRAG, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV6_GTPU_IPV4_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV6_GTPU_IPV4_TCP, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV6_GTPU_IPV4_ICMP, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV4_GTPU_IPV6_FRAG, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV4_GTPU_IPV6_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV4_GTPU_IPV6_TCP, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV4_GTPU_IPV6_ICMPV6, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV6_GTPU_IPV6_FRAG, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV6_GTPU_IPV6_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV6_GTPU_IPV6_TCP, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+ { ICE_MAC_IPV6_GTPU_IPV6_ICMPV6, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+};
+
+static const struct ice_ptype_attributes ice_attr_gtpu_up[] = {
+ { ICE_MAC_IPV4_GTPU_IPV4_FRAG, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV4_GTPU_IPV4_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV4_GTPU_IPV4_TCP, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV4_GTPU_IPV4_ICMP, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV6_GTPU_IPV4_FRAG, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV6_GTPU_IPV4_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV6_GTPU_IPV4_TCP, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV6_GTPU_IPV4_ICMP, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV4_GTPU_IPV6_FRAG, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV4_GTPU_IPV6_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV4_GTPU_IPV6_TCP, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV4_GTPU_IPV6_ICMPV6, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV6_GTPU_IPV6_FRAG, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV6_GTPU_IPV6_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV6_GTPU_IPV6_TCP, ICE_PTYPE_ATTR_GTP_UPLINK },
+ { ICE_MAC_IPV6_GTPU_IPV6_ICMPV6, ICE_PTYPE_ATTR_GTP_UPLINK },
+};
+
+static const u32 ice_ptypes_gtpu[] = {
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x7FFFFE00, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for PPPoE */
+static const u32 ice_ptypes_pppoe[] = {
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x03ffe000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with PFCP NODE header */
+static const u32 ice_ptypes_pfcp_node[] = {
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x80000000, 0x00000002,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with PFCP SESSION header */
+static const u32 ice_ptypes_pfcp_session[] = {
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000005,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for L2TPv3 */
+static const u32 ice_ptypes_l2tpv3[] = {
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000300,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for ESP */
+static const u32 ice_ptypes_esp[] = {
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000003, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for AH */
+static const u32 ice_ptypes_ah[] = {
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x0000000C, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with NAT_T ESP header */
+static const u32 ice_ptypes_nat_t_esp[] = {
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000030, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+static const u32 ice_ptypes_mac_non_ip_ofos[] = {
+ 0x00000846, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00400000, 0x03FFF000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Manage parameters and info. used during the creation of a flow profile */
+struct ice_flow_prof_params {
+ enum ice_block blk;
+ u16 entry_length; /* # of bytes formatted entry will require */
+ u8 es_cnt;
+ struct ice_flow_prof *prof;
+
+ /* For ACL, the es[0] will have the data of ICE_RX_MDID_PKT_FLAGS_15_0
+ * This will give us the direction flags.
+ */
+ struct ice_fv_word es[ICE_MAX_FV_WORDS];
+ /* attributes can be used to add attributes to a particular PTYPE */
+ const struct ice_ptype_attributes *attr;
+ u16 attr_cnt;
+
+ u16 mask[ICE_MAX_FV_WORDS];
+ DECLARE_BITMAP(ptypes, ICE_FLOW_PTYPE_MAX);
+};
+
+#define ICE_FLOW_RSS_HDRS_INNER_MASK \
+ (ICE_FLOW_SEG_HDR_PPPOE | ICE_FLOW_SEG_HDR_GTPC | \
+ ICE_FLOW_SEG_HDR_GTPC_TEID | ICE_FLOW_SEG_HDR_GTPU | \
+ ICE_FLOW_SEG_HDR_PFCP_SESSION | ICE_FLOW_SEG_HDR_L2TPV3 | \
+ ICE_FLOW_SEG_HDR_ESP | ICE_FLOW_SEG_HDR_AH | \
+ ICE_FLOW_SEG_HDR_NAT_T_ESP)
+
+#define ICE_FLOW_SEG_HDRS_L3_MASK \
+ (ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_ARP)
+#define ICE_FLOW_SEG_HDRS_L4_MASK \
+ (ICE_FLOW_SEG_HDR_ICMP | ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | \
+ ICE_FLOW_SEG_HDR_SCTP)
+/* mask for L4 protocols that are NOT part of IPv4/6 OTHER PTYPE groups */
+#define ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER \
+ (ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_SCTP)
+
+/**
+ * ice_flow_val_hdrs - validates packet segments for valid protocol headers
+ * @segs: array of one or more packet segments that describe the flow
+ * @segs_cnt: number of packet segments provided
+ */
+static int ice_flow_val_hdrs(struct ice_flow_seg_info *segs, u8 segs_cnt)
+{
+ u8 i;
+
+ for (i = 0; i < segs_cnt; i++) {
+ /* Multiple L3 headers */
+ if (segs[i].hdrs & ICE_FLOW_SEG_HDRS_L3_MASK &&
+ !is_power_of_2(segs[i].hdrs & ICE_FLOW_SEG_HDRS_L3_MASK))
+ return -EINVAL;
+
+ /* Multiple L4 headers */
+ if (segs[i].hdrs & ICE_FLOW_SEG_HDRS_L4_MASK &&
+ !is_power_of_2(segs[i].hdrs & ICE_FLOW_SEG_HDRS_L4_MASK))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Sizes of fixed known protocol headers without header options */
+#define ICE_FLOW_PROT_HDR_SZ_MAC 14
+#define ICE_FLOW_PROT_HDR_SZ_MAC_VLAN (ICE_FLOW_PROT_HDR_SZ_MAC + 2)
+#define ICE_FLOW_PROT_HDR_SZ_IPV4 20
+#define ICE_FLOW_PROT_HDR_SZ_IPV6 40
+#define ICE_FLOW_PROT_HDR_SZ_ARP 28
+#define ICE_FLOW_PROT_HDR_SZ_ICMP 8
+#define ICE_FLOW_PROT_HDR_SZ_TCP 20
+#define ICE_FLOW_PROT_HDR_SZ_UDP 8
+#define ICE_FLOW_PROT_HDR_SZ_SCTP 12
+
+/**
+ * ice_flow_calc_seg_sz - calculates size of a packet segment based on headers
+ * @params: information about the flow to be processed
+ * @seg: index of packet segment whose header size is to be determined
+ */
+static u16 ice_flow_calc_seg_sz(struct ice_flow_prof_params *params, u8 seg)
+{
+ u16 sz;
+
+ /* L2 headers */
+ sz = (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_VLAN) ?
+ ICE_FLOW_PROT_HDR_SZ_MAC_VLAN : ICE_FLOW_PROT_HDR_SZ_MAC;
+
+ /* L3 headers */
+ if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV4)
+ sz += ICE_FLOW_PROT_HDR_SZ_IPV4;
+ else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV6)
+ sz += ICE_FLOW_PROT_HDR_SZ_IPV6;
+ else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_ARP)
+ sz += ICE_FLOW_PROT_HDR_SZ_ARP;
+ else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDRS_L4_MASK)
+ /* An L3 header is required if L4 is specified */
+ return 0;
+
+ /* L4 headers */
+ if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_ICMP)
+ sz += ICE_FLOW_PROT_HDR_SZ_ICMP;
+ else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_TCP)
+ sz += ICE_FLOW_PROT_HDR_SZ_TCP;
+ else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_UDP)
+ sz += ICE_FLOW_PROT_HDR_SZ_UDP;
+ else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_SCTP)
+ sz += ICE_FLOW_PROT_HDR_SZ_SCTP;
+
+ return sz;
+}
+
+/**
+ * ice_flow_proc_seg_hdrs - process protocol headers present in pkt segments
+ * @params: information about the flow to be processed
+ *
+ * This function identifies the packet types associated with the protocol
+ * headers being present in packet segments of the specified flow profile.
+ */
+static int ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
+{
+ struct ice_flow_prof *prof;
+ u8 i;
+
+ memset(params->ptypes, 0xff, sizeof(params->ptypes));
+
+ prof = params->prof;
+
+ for (i = 0; i < params->prof->segs_cnt; i++) {
+ const unsigned long *src;
+ u32 hdrs;
+
+ hdrs = prof->segs[i].hdrs;
+
+ if (hdrs & ICE_FLOW_SEG_HDR_ETH) {
+ src = !i ? (const unsigned long *)ice_ptypes_mac_ofos :
+ (const unsigned long *)ice_ptypes_mac_il;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ }
+
+ if (i && hdrs & ICE_FLOW_SEG_HDR_VLAN) {
+ src = (const unsigned long *)ice_ptypes_macvlan_il;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ }
+
+ if (!i && hdrs & ICE_FLOW_SEG_HDR_ARP) {
+ bitmap_and(params->ptypes, params->ptypes,
+ (const unsigned long *)ice_ptypes_arp_of,
+ ICE_FLOW_PTYPE_MAX);
+ }
+
+ if ((hdrs & ICE_FLOW_SEG_HDR_IPV4) &&
+ (hdrs & ICE_FLOW_SEG_HDR_IPV_OTHER)) {
+ src = i ? (const unsigned long *)ice_ptypes_ipv4_il :
+ (const unsigned long *)ice_ptypes_ipv4_ofos_all;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ } else if ((hdrs & ICE_FLOW_SEG_HDR_IPV6) &&
+ (hdrs & ICE_FLOW_SEG_HDR_IPV_OTHER)) {
+ src = i ? (const unsigned long *)ice_ptypes_ipv6_il :
+ (const unsigned long *)ice_ptypes_ipv6_ofos_all;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ } else if ((hdrs & ICE_FLOW_SEG_HDR_IPV4) &&
+ !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER)) {
+ src = !i ? (const unsigned long *)ice_ptypes_ipv4_ofos_no_l4 :
+ (const unsigned long *)ice_ptypes_ipv4_il_no_l4;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ } else if (hdrs & ICE_FLOW_SEG_HDR_IPV4) {
+ src = !i ? (const unsigned long *)ice_ptypes_ipv4_ofos :
+ (const unsigned long *)ice_ptypes_ipv4_il;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ } else if ((hdrs & ICE_FLOW_SEG_HDR_IPV6) &&
+ !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER)) {
+ src = !i ? (const unsigned long *)ice_ptypes_ipv6_ofos_no_l4 :
+ (const unsigned long *)ice_ptypes_ipv6_il_no_l4;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ } else if (hdrs & ICE_FLOW_SEG_HDR_IPV6) {
+ src = !i ? (const unsigned long *)ice_ptypes_ipv6_ofos :
+ (const unsigned long *)ice_ptypes_ipv6_il;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ }
+
+ if (hdrs & ICE_FLOW_SEG_HDR_ETH_NON_IP) {
+ src = (const unsigned long *)ice_ptypes_mac_non_ip_ofos;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ } else if (hdrs & ICE_FLOW_SEG_HDR_PPPOE) {
+ src = (const unsigned long *)ice_ptypes_pppoe;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ } else {
+ src = (const unsigned long *)ice_ptypes_pppoe;
+ bitmap_andnot(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ }
+
+ if (hdrs & ICE_FLOW_SEG_HDR_UDP) {
+ src = (const unsigned long *)ice_ptypes_udp_il;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ } else if (hdrs & ICE_FLOW_SEG_HDR_TCP) {
+ bitmap_and(params->ptypes, params->ptypes,
+ (const unsigned long *)ice_ptypes_tcp_il,
+ ICE_FLOW_PTYPE_MAX);
+ } else if (hdrs & ICE_FLOW_SEG_HDR_SCTP) {
+ src = (const unsigned long *)ice_ptypes_sctp_il;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ }
+
+ if (hdrs & ICE_FLOW_SEG_HDR_ICMP) {
+ src = !i ? (const unsigned long *)ice_ptypes_icmp_of :
+ (const unsigned long *)ice_ptypes_icmp_il;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ } else if (hdrs & ICE_FLOW_SEG_HDR_GRE) {
+ if (!i) {
+ src = (const unsigned long *)ice_ptypes_gre_of;
+ bitmap_and(params->ptypes, params->ptypes,
+ src, ICE_FLOW_PTYPE_MAX);
+ }
+ } else if (hdrs & ICE_FLOW_SEG_HDR_GTPC) {
+ src = (const unsigned long *)ice_ptypes_gtpc;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ } else if (hdrs & ICE_FLOW_SEG_HDR_GTPC_TEID) {
+ src = (const unsigned long *)ice_ptypes_gtpc_tid;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ } else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_DWN) {
+ src = (const unsigned long *)ice_ptypes_gtpu;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+
+ /* Attributes for GTP packet with downlink */
+ params->attr = ice_attr_gtpu_down;
+ params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_down);
+ } else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_UP) {
+ src = (const unsigned long *)ice_ptypes_gtpu;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+
+ /* Attributes for GTP packet with uplink */
+ params->attr = ice_attr_gtpu_up;
+ params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_up);
+ } else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_EH) {
+ src = (const unsigned long *)ice_ptypes_gtpu;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+
+ /* Attributes for GTP packet with Extension Header */
+ params->attr = ice_attr_gtpu_eh;
+ params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_eh);
+ } else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_IP) {
+ src = (const unsigned long *)ice_ptypes_gtpu;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ } else if (hdrs & ICE_FLOW_SEG_HDR_L2TPV3) {
+ src = (const unsigned long *)ice_ptypes_l2tpv3;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ } else if (hdrs & ICE_FLOW_SEG_HDR_ESP) {
+ src = (const unsigned long *)ice_ptypes_esp;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ } else if (hdrs & ICE_FLOW_SEG_HDR_AH) {
+ src = (const unsigned long *)ice_ptypes_ah;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ } else if (hdrs & ICE_FLOW_SEG_HDR_NAT_T_ESP) {
+ src = (const unsigned long *)ice_ptypes_nat_t_esp;
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ }
+
+ if (hdrs & ICE_FLOW_SEG_HDR_PFCP) {
+ if (hdrs & ICE_FLOW_SEG_HDR_PFCP_NODE)
+ src = (const unsigned long *)ice_ptypes_pfcp_node;
+ else
+ src = (const unsigned long *)ice_ptypes_pfcp_session;
+
+ bitmap_and(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ } else {
+ src = (const unsigned long *)ice_ptypes_pfcp_node;
+ bitmap_andnot(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+
+ src = (const unsigned long *)ice_ptypes_pfcp_session;
+ bitmap_andnot(params->ptypes, params->ptypes, src,
+ ICE_FLOW_PTYPE_MAX);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_flow_xtract_fld - Create an extraction sequence entry for the given field
+ * @hw: pointer to the HW struct
+ * @params: information about the flow to be processed
+ * @seg: packet segment index of the field to be extracted
+ * @fld: ID of field to be extracted
+ * @match: bit field of all fields
+ *
+ * This function determines the protocol ID, offset, and size of the given
+ * field. It then allocates one or more extraction sequence entries for the
+ * given field, and fill the entries with protocol ID and offset information.
+ */
+static int
+ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
+ u8 seg, enum ice_flow_field fld, u64 match)
+{
+ enum ice_flow_field sib = ICE_FLOW_FIELD_IDX_MAX;
+ enum ice_prot_id prot_id = ICE_PROT_ID_INVAL;
+ u8 fv_words = hw->blk[params->blk].es.fvw;
+ struct ice_flow_fld_info *flds;
+ u16 cnt, ese_bits, i;
+ u16 sib_mask = 0;
+ u16 mask;
+ u16 off;
+
+ flds = params->prof->segs[seg].fields;
+
+ switch (fld) {
+ case ICE_FLOW_FIELD_IDX_ETH_DA:
+ case ICE_FLOW_FIELD_IDX_ETH_SA:
+ case ICE_FLOW_FIELD_IDX_S_VLAN:
+ case ICE_FLOW_FIELD_IDX_C_VLAN:
+ prot_id = seg == 0 ? ICE_PROT_MAC_OF_OR_S : ICE_PROT_MAC_IL;
+ break;
+ case ICE_FLOW_FIELD_IDX_ETH_TYPE:
+ prot_id = seg == 0 ? ICE_PROT_ETYPE_OL : ICE_PROT_ETYPE_IL;
+ break;
+ case ICE_FLOW_FIELD_IDX_IPV4_DSCP:
+ prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL;
+ break;
+ case ICE_FLOW_FIELD_IDX_IPV6_DSCP:
+ prot_id = seg == 0 ? ICE_PROT_IPV6_OF_OR_S : ICE_PROT_IPV6_IL;
+ break;
+ case ICE_FLOW_FIELD_IDX_IPV4_TTL:
+ case ICE_FLOW_FIELD_IDX_IPV4_PROT:
+ prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL;
+
+ /* TTL and PROT share the same extraction seq. entry.
+ * Each is considered a sibling to the other in terms of sharing
+ * the same extraction sequence entry.
+ */
+ if (fld == ICE_FLOW_FIELD_IDX_IPV4_TTL)
+ sib = ICE_FLOW_FIELD_IDX_IPV4_PROT;
+ else if (fld == ICE_FLOW_FIELD_IDX_IPV4_PROT)
+ sib = ICE_FLOW_FIELD_IDX_IPV4_TTL;
+
+ /* If the sibling field is also included, that field's
+ * mask needs to be included.
+ */
+ if (match & BIT(sib))
+ sib_mask = ice_flds_info[sib].mask;
+ break;
+ case ICE_FLOW_FIELD_IDX_IPV6_TTL:
+ case ICE_FLOW_FIELD_IDX_IPV6_PROT:
+ prot_id = seg == 0 ? ICE_PROT_IPV6_OF_OR_S : ICE_PROT_IPV6_IL;
+
+ /* TTL and PROT share the same extraction seq. entry.
+ * Each is considered a sibling to the other in terms of sharing
+ * the same extraction sequence entry.
+ */
+ if (fld == ICE_FLOW_FIELD_IDX_IPV6_TTL)
+ sib = ICE_FLOW_FIELD_IDX_IPV6_PROT;
+ else if (fld == ICE_FLOW_FIELD_IDX_IPV6_PROT)
+ sib = ICE_FLOW_FIELD_IDX_IPV6_TTL;
+
+ /* If the sibling field is also included, that field's
+ * mask needs to be included.
+ */
+ if (match & BIT(sib))
+ sib_mask = ice_flds_info[sib].mask;
+ break;
+ case ICE_FLOW_FIELD_IDX_IPV4_SA:
+ case ICE_FLOW_FIELD_IDX_IPV4_DA:
+ prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL;
+ break;
+ case ICE_FLOW_FIELD_IDX_IPV6_SA:
+ case ICE_FLOW_FIELD_IDX_IPV6_DA:
+ prot_id = seg == 0 ? ICE_PROT_IPV6_OF_OR_S : ICE_PROT_IPV6_IL;
+ break;
+ case ICE_FLOW_FIELD_IDX_TCP_SRC_PORT:
+ case ICE_FLOW_FIELD_IDX_TCP_DST_PORT:
+ case ICE_FLOW_FIELD_IDX_TCP_FLAGS:
+ prot_id = ICE_PROT_TCP_IL;
+ break;
+ case ICE_FLOW_FIELD_IDX_UDP_SRC_PORT:
+ case ICE_FLOW_FIELD_IDX_UDP_DST_PORT:
+ prot_id = ICE_PROT_UDP_IL_OR_S;
+ break;
+ case ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT:
+ case ICE_FLOW_FIELD_IDX_SCTP_DST_PORT:
+ prot_id = ICE_PROT_SCTP_IL;
+ break;
+ case ICE_FLOW_FIELD_IDX_GTPC_TEID:
+ case ICE_FLOW_FIELD_IDX_GTPU_IP_TEID:
+ case ICE_FLOW_FIELD_IDX_GTPU_UP_TEID:
+ case ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID:
+ case ICE_FLOW_FIELD_IDX_GTPU_EH_TEID:
+ case ICE_FLOW_FIELD_IDX_GTPU_EH_QFI:
+ /* GTP is accessed through UDP OF protocol */
+ prot_id = ICE_PROT_UDP_OF;
+ break;
+ case ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID:
+ prot_id = ICE_PROT_PPPOE;
+ break;
+ case ICE_FLOW_FIELD_IDX_PFCP_SEID:
+ prot_id = ICE_PROT_UDP_IL_OR_S;
+ break;
+ case ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID:
+ prot_id = ICE_PROT_L2TPV3;
+ break;
+ case ICE_FLOW_FIELD_IDX_ESP_SPI:
+ prot_id = ICE_PROT_ESP_F;
+ break;
+ case ICE_FLOW_FIELD_IDX_AH_SPI:
+ prot_id = ICE_PROT_ESP_2;
+ break;
+ case ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI:
+ prot_id = ICE_PROT_UDP_IL_OR_S;
+ break;
+ case ICE_FLOW_FIELD_IDX_ARP_SIP:
+ case ICE_FLOW_FIELD_IDX_ARP_DIP:
+ case ICE_FLOW_FIELD_IDX_ARP_SHA:
+ case ICE_FLOW_FIELD_IDX_ARP_DHA:
+ case ICE_FLOW_FIELD_IDX_ARP_OP:
+ prot_id = ICE_PROT_ARP_OF;
+ break;
+ case ICE_FLOW_FIELD_IDX_ICMP_TYPE:
+ case ICE_FLOW_FIELD_IDX_ICMP_CODE:
+ /* ICMP type and code share the same extraction seq. entry */
+ prot_id = (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV4) ?
+ ICE_PROT_ICMP_IL : ICE_PROT_ICMPV6_IL;
+ sib = fld == ICE_FLOW_FIELD_IDX_ICMP_TYPE ?
+ ICE_FLOW_FIELD_IDX_ICMP_CODE :
+ ICE_FLOW_FIELD_IDX_ICMP_TYPE;
+ break;
+ case ICE_FLOW_FIELD_IDX_GRE_KEYID:
+ prot_id = ICE_PROT_GRE_OF;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ /* Each extraction sequence entry is a word in size, and extracts a
+ * word-aligned offset from a protocol header.
+ */
+ ese_bits = ICE_FLOW_FV_EXTRACT_SZ * BITS_PER_BYTE;
+
+ flds[fld].xtrct.prot_id = prot_id;
+ flds[fld].xtrct.off = (ice_flds_info[fld].off / ese_bits) *
+ ICE_FLOW_FV_EXTRACT_SZ;
+ flds[fld].xtrct.disp = (u8)(ice_flds_info[fld].off % ese_bits);
+ flds[fld].xtrct.idx = params->es_cnt;
+ flds[fld].xtrct.mask = ice_flds_info[fld].mask;
+
+ /* Adjust the next field-entry index after accommodating the number of
+ * entries this field consumes
+ */
+ cnt = DIV_ROUND_UP(flds[fld].xtrct.disp + ice_flds_info[fld].size,
+ ese_bits);
+
+ /* Fill in the extraction sequence entries needed for this field */
+ off = flds[fld].xtrct.off;
+ mask = flds[fld].xtrct.mask;
+ for (i = 0; i < cnt; i++) {
+ /* Only consume an extraction sequence entry if there is no
+ * sibling field associated with this field or the sibling entry
+ * already extracts the word shared with this field.
+ */
+ if (sib == ICE_FLOW_FIELD_IDX_MAX ||
+ flds[sib].xtrct.prot_id == ICE_PROT_ID_INVAL ||
+ flds[sib].xtrct.off != off) {
+ u8 idx;
+
+ /* Make sure the number of extraction sequence required
+ * does not exceed the block's capability
+ */
+ if (params->es_cnt >= fv_words)
+ return -ENOSPC;
+
+ /* some blocks require a reversed field vector layout */
+ if (hw->blk[params->blk].es.reverse)
+ idx = fv_words - params->es_cnt - 1;
+ else
+ idx = params->es_cnt;
+
+ params->es[idx].prot_id = prot_id;
+ params->es[idx].off = off;
+ params->mask[idx] = mask | sib_mask;
+ params->es_cnt++;
+ }
+
+ off += ICE_FLOW_FV_EXTRACT_SZ;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_flow_xtract_raws - Create extract sequence entries for raw bytes
+ * @hw: pointer to the HW struct
+ * @params: information about the flow to be processed
+ * @seg: index of packet segment whose raw fields are to be extracted
+ */
+static int
+ice_flow_xtract_raws(struct ice_hw *hw, struct ice_flow_prof_params *params,
+ u8 seg)
+{
+ u16 fv_words;
+ u16 hdrs_sz;
+ u8 i;
+
+ if (!params->prof->segs[seg].raws_cnt)
+ return 0;
+
+ if (params->prof->segs[seg].raws_cnt >
+ ARRAY_SIZE(params->prof->segs[seg].raws))
+ return -ENOSPC;
+
+ /* Offsets within the segment headers are not supported */
+ hdrs_sz = ice_flow_calc_seg_sz(params, seg);
+ if (!hdrs_sz)
+ return -EINVAL;
+
+ fv_words = hw->blk[params->blk].es.fvw;
+
+ for (i = 0; i < params->prof->segs[seg].raws_cnt; i++) {
+ struct ice_flow_seg_fld_raw *raw;
+ u16 off, cnt, j;
+
+ raw = &params->prof->segs[seg].raws[i];
+
+ /* Storing extraction information */
+ raw->info.xtrct.prot_id = ICE_PROT_MAC_OF_OR_S;
+ raw->info.xtrct.off = (raw->off / ICE_FLOW_FV_EXTRACT_SZ) *
+ ICE_FLOW_FV_EXTRACT_SZ;
+ raw->info.xtrct.disp = (raw->off % ICE_FLOW_FV_EXTRACT_SZ) *
+ BITS_PER_BYTE;
+ raw->info.xtrct.idx = params->es_cnt;
+
+ /* Determine the number of field vector entries this raw field
+ * consumes.
+ */
+ cnt = DIV_ROUND_UP(raw->info.xtrct.disp +
+ (raw->info.src.last * BITS_PER_BYTE),
+ (ICE_FLOW_FV_EXTRACT_SZ * BITS_PER_BYTE));
+ off = raw->info.xtrct.off;
+ for (j = 0; j < cnt; j++) {
+ u16 idx;
+
+ /* Make sure the number of extraction sequence required
+ * does not exceed the block's capability
+ */
+ if (params->es_cnt >= hw->blk[params->blk].es.count ||
+ params->es_cnt >= ICE_MAX_FV_WORDS)
+ return -ENOSPC;
+
+ /* some blocks require a reversed field vector layout */
+ if (hw->blk[params->blk].es.reverse)
+ idx = fv_words - params->es_cnt - 1;
+ else
+ idx = params->es_cnt;
+
+ params->es[idx].prot_id = raw->info.xtrct.prot_id;
+ params->es[idx].off = off;
+ params->es_cnt++;
+ off += ICE_FLOW_FV_EXTRACT_SZ;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_flow_create_xtrct_seq - Create an extraction sequence for given segments
+ * @hw: pointer to the HW struct
+ * @params: information about the flow to be processed
+ *
+ * This function iterates through all matched fields in the given segments, and
+ * creates an extraction sequence for the fields.
+ */
+static int
+ice_flow_create_xtrct_seq(struct ice_hw *hw,
+ struct ice_flow_prof_params *params)
+{
+ struct ice_flow_prof *prof = params->prof;
+ int status = 0;
+ u8 i;
+
+ for (i = 0; i < prof->segs_cnt; i++) {
+ u64 match = params->prof->segs[i].match;
+ enum ice_flow_field j;
+
+ for_each_set_bit(j, (unsigned long *)&match,
+ ICE_FLOW_FIELD_IDX_MAX) {
+ status = ice_flow_xtract_fld(hw, params, i, j, match);
+ if (status)
+ return status;
+ clear_bit(j, (unsigned long *)&match);
+ }
+
+ /* Process raw matching bytes */
+ status = ice_flow_xtract_raws(hw, params, i);
+ if (status)
+ return status;
+ }
+
+ return status;
+}
+
+/**
+ * ice_flow_proc_segs - process all packet segments associated with a profile
+ * @hw: pointer to the HW struct
+ * @params: information about the flow to be processed
+ */
+static int
+ice_flow_proc_segs(struct ice_hw *hw, struct ice_flow_prof_params *params)
+{
+ int status;
+
+ status = ice_flow_proc_seg_hdrs(params);
+ if (status)
+ return status;
+
+ status = ice_flow_create_xtrct_seq(hw, params);
+ if (status)
+ return status;
+
+ switch (params->blk) {
+ case ICE_BLK_FD:
+ case ICE_BLK_RSS:
+ status = 0;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return status;
+}
+
+#define ICE_FLOW_FIND_PROF_CHK_FLDS 0x00000001
+#define ICE_FLOW_FIND_PROF_CHK_VSI 0x00000002
+#define ICE_FLOW_FIND_PROF_NOT_CHK_DIR 0x00000004
+
+/**
+ * ice_flow_find_prof_conds - Find a profile matching headers and conditions
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @dir: flow direction
+ * @segs: array of one or more packet segments that describe the flow
+ * @segs_cnt: number of packet segments provided
+ * @vsi_handle: software VSI handle to check VSI (ICE_FLOW_FIND_PROF_CHK_VSI)
+ * @conds: additional conditions to be checked (ICE_FLOW_FIND_PROF_CHK_*)
+ */
+static struct ice_flow_prof *
+ice_flow_find_prof_conds(struct ice_hw *hw, enum ice_block blk,
+ enum ice_flow_dir dir, struct ice_flow_seg_info *segs,
+ u8 segs_cnt, u16 vsi_handle, u32 conds)
+{
+ struct ice_flow_prof *p, *prof = NULL;
+
+ mutex_lock(&hw->fl_profs_locks[blk]);
+ list_for_each_entry(p, &hw->fl_profs[blk], l_entry)
+ if ((p->dir == dir || conds & ICE_FLOW_FIND_PROF_NOT_CHK_DIR) &&
+ segs_cnt && segs_cnt == p->segs_cnt) {
+ u8 i;
+
+ /* Check for profile-VSI association if specified */
+ if ((conds & ICE_FLOW_FIND_PROF_CHK_VSI) &&
+ ice_is_vsi_valid(hw, vsi_handle) &&
+ !test_bit(vsi_handle, p->vsis))
+ continue;
+
+ /* Protocol headers must be checked. Matched fields are
+ * checked if specified.
+ */
+ for (i = 0; i < segs_cnt; i++)
+ if (segs[i].hdrs != p->segs[i].hdrs ||
+ ((conds & ICE_FLOW_FIND_PROF_CHK_FLDS) &&
+ segs[i].match != p->segs[i].match))
+ break;
+
+ /* A match is found if all segments are matched */
+ if (i == segs_cnt) {
+ prof = p;
+ break;
+ }
+ }
+ mutex_unlock(&hw->fl_profs_locks[blk]);
+
+ return prof;
+}
+
+/**
+ * ice_flow_find_prof_id - Look up a profile with given profile ID
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @prof_id: unique ID to identify this flow profile
+ */
+static struct ice_flow_prof *
+ice_flow_find_prof_id(struct ice_hw *hw, enum ice_block blk, u64 prof_id)
+{
+ struct ice_flow_prof *p;
+
+ list_for_each_entry(p, &hw->fl_profs[blk], l_entry)
+ if (p->id == prof_id)
+ return p;
+
+ return NULL;
+}
+
+/**
+ * ice_dealloc_flow_entry - Deallocate flow entry memory
+ * @hw: pointer to the HW struct
+ * @entry: flow entry to be removed
+ */
+static void
+ice_dealloc_flow_entry(struct ice_hw *hw, struct ice_flow_entry *entry)
+{
+ if (!entry)
+ return;
+
+ if (entry->entry)
+ devm_kfree(ice_hw_to_dev(hw), entry->entry);
+
+ devm_kfree(ice_hw_to_dev(hw), entry);
+}
+
+/**
+ * ice_flow_rem_entry_sync - Remove a flow entry
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @entry: flow entry to be removed
+ */
+static int
+ice_flow_rem_entry_sync(struct ice_hw *hw, enum ice_block __always_unused blk,
+ struct ice_flow_entry *entry)
+{
+ if (!entry)
+ return -EINVAL;
+
+ list_del(&entry->l_entry);
+
+ ice_dealloc_flow_entry(hw, entry);
+
+ return 0;
+}
+
+/**
+ * ice_flow_add_prof_sync - Add a flow profile for packet segments and fields
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @dir: flow direction
+ * @prof_id: unique ID to identify this flow profile
+ * @segs: array of one or more packet segments that describe the flow
+ * @segs_cnt: number of packet segments provided
+ * @prof: stores the returned flow profile added
+ *
+ * Assumption: the caller has acquired the lock to the profile list
+ */
+static int
+ice_flow_add_prof_sync(struct ice_hw *hw, enum ice_block blk,
+ enum ice_flow_dir dir, u64 prof_id,
+ struct ice_flow_seg_info *segs, u8 segs_cnt,
+ struct ice_flow_prof **prof)
+{
+ struct ice_flow_prof_params *params;
+ int status;
+ u8 i;
+
+ if (!prof)
+ return -EINVAL;
+
+ params = kzalloc(sizeof(*params), GFP_KERNEL);
+ if (!params)
+ return -ENOMEM;
+
+ params->prof = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*params->prof),
+ GFP_KERNEL);
+ if (!params->prof) {
+ status = -ENOMEM;
+ goto free_params;
+ }
+
+ /* initialize extraction sequence to all invalid (0xff) */
+ for (i = 0; i < ICE_MAX_FV_WORDS; i++) {
+ params->es[i].prot_id = ICE_PROT_INVALID;
+ params->es[i].off = ICE_FV_OFFSET_INVAL;
+ }
+
+ params->blk = blk;
+ params->prof->id = prof_id;
+ params->prof->dir = dir;
+ params->prof->segs_cnt = segs_cnt;
+
+ /* Make a copy of the segments that need to be persistent in the flow
+ * profile instance
+ */
+ for (i = 0; i < segs_cnt; i++)
+ memcpy(&params->prof->segs[i], &segs[i], sizeof(*segs));
+
+ status = ice_flow_proc_segs(hw, params);
+ if (status) {
+ ice_debug(hw, ICE_DBG_FLOW, "Error processing a flow's packet segments\n");
+ goto out;
+ }
+
+ /* Add a HW profile for this flow profile */
+ status = ice_add_prof(hw, blk, prof_id, (u8 *)params->ptypes,
+ params->attr, params->attr_cnt, params->es,
+ params->mask);
+ if (status) {
+ ice_debug(hw, ICE_DBG_FLOW, "Error adding a HW flow profile\n");
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&params->prof->entries);
+ mutex_init(&params->prof->entries_lock);
+ *prof = params->prof;
+
+out:
+ if (status)
+ devm_kfree(ice_hw_to_dev(hw), params->prof);
+free_params:
+ kfree(params);
+
+ return status;
+}
+
+/**
+ * ice_flow_rem_prof_sync - remove a flow profile
+ * @hw: pointer to the hardware structure
+ * @blk: classification stage
+ * @prof: pointer to flow profile to remove
+ *
+ * Assumption: the caller has acquired the lock to the profile list
+ */
+static int
+ice_flow_rem_prof_sync(struct ice_hw *hw, enum ice_block blk,
+ struct ice_flow_prof *prof)
+{
+ int status;
+
+ /* Remove all remaining flow entries before removing the flow profile */
+ if (!list_empty(&prof->entries)) {
+ struct ice_flow_entry *e, *t;
+
+ mutex_lock(&prof->entries_lock);
+
+ list_for_each_entry_safe(e, t, &prof->entries, l_entry) {
+ status = ice_flow_rem_entry_sync(hw, blk, e);
+ if (status)
+ break;
+ }
+
+ mutex_unlock(&prof->entries_lock);
+ }
+
+ /* Remove all hardware profiles associated with this flow profile */
+ status = ice_rem_prof(hw, blk, prof->id);
+ if (!status) {
+ list_del(&prof->l_entry);
+ mutex_destroy(&prof->entries_lock);
+ devm_kfree(ice_hw_to_dev(hw), prof);
+ }
+
+ return status;
+}
+
+/**
+ * ice_flow_assoc_prof - associate a VSI with a flow profile
+ * @hw: pointer to the hardware structure
+ * @blk: classification stage
+ * @prof: pointer to flow profile
+ * @vsi_handle: software VSI handle
+ *
+ * Assumption: the caller has acquired the lock to the profile list
+ * and the software VSI handle has been validated
+ */
+static int
+ice_flow_assoc_prof(struct ice_hw *hw, enum ice_block blk,
+ struct ice_flow_prof *prof, u16 vsi_handle)
+{
+ int status = 0;
+
+ if (!test_bit(vsi_handle, prof->vsis)) {
+ status = ice_add_prof_id_flow(hw, blk,
+ ice_get_hw_vsi_num(hw,
+ vsi_handle),
+ prof->id);
+ if (!status)
+ set_bit(vsi_handle, prof->vsis);
+ else
+ ice_debug(hw, ICE_DBG_FLOW, "HW profile add failed, %d\n",
+ status);
+ }
+
+ return status;
+}
+
+/**
+ * ice_flow_disassoc_prof - disassociate a VSI from a flow profile
+ * @hw: pointer to the hardware structure
+ * @blk: classification stage
+ * @prof: pointer to flow profile
+ * @vsi_handle: software VSI handle
+ *
+ * Assumption: the caller has acquired the lock to the profile list
+ * and the software VSI handle has been validated
+ */
+static int
+ice_flow_disassoc_prof(struct ice_hw *hw, enum ice_block blk,
+ struct ice_flow_prof *prof, u16 vsi_handle)
+{
+ int status = 0;
+
+ if (test_bit(vsi_handle, prof->vsis)) {
+ status = ice_rem_prof_id_flow(hw, blk,
+ ice_get_hw_vsi_num(hw,
+ vsi_handle),
+ prof->id);
+ if (!status)
+ clear_bit(vsi_handle, prof->vsis);
+ else
+ ice_debug(hw, ICE_DBG_FLOW, "HW profile remove failed, %d\n",
+ status);
+ }
+
+ return status;
+}
+
+/**
+ * ice_flow_add_prof - Add a flow profile for packet segments and matched fields
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @dir: flow direction
+ * @prof_id: unique ID to identify this flow profile
+ * @segs: array of one or more packet segments that describe the flow
+ * @segs_cnt: number of packet segments provided
+ * @prof: stores the returned flow profile added
+ */
+int
+ice_flow_add_prof(struct ice_hw *hw, enum ice_block blk, enum ice_flow_dir dir,
+ u64 prof_id, struct ice_flow_seg_info *segs, u8 segs_cnt,
+ struct ice_flow_prof **prof)
+{
+ int status;
+
+ if (segs_cnt > ICE_FLOW_SEG_MAX)
+ return -ENOSPC;
+
+ if (!segs_cnt)
+ return -EINVAL;
+
+ if (!segs)
+ return -EINVAL;
+
+ status = ice_flow_val_hdrs(segs, segs_cnt);
+ if (status)
+ return status;
+
+ mutex_lock(&hw->fl_profs_locks[blk]);
+
+ status = ice_flow_add_prof_sync(hw, blk, dir, prof_id, segs, segs_cnt,
+ prof);
+ if (!status)
+ list_add(&(*prof)->l_entry, &hw->fl_profs[blk]);
+
+ mutex_unlock(&hw->fl_profs_locks[blk]);
+
+ return status;
+}
+
+/**
+ * ice_flow_rem_prof - Remove a flow profile and all entries associated with it
+ * @hw: pointer to the HW struct
+ * @blk: the block for which the flow profile is to be removed
+ * @prof_id: unique ID of the flow profile to be removed
+ */
+int ice_flow_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 prof_id)
+{
+ struct ice_flow_prof *prof;
+ int status;
+
+ mutex_lock(&hw->fl_profs_locks[blk]);
+
+ prof = ice_flow_find_prof_id(hw, blk, prof_id);
+ if (!prof) {
+ status = -ENOENT;
+ goto out;
+ }
+
+ /* prof becomes invalid after the call */
+ status = ice_flow_rem_prof_sync(hw, blk, prof);
+
+out:
+ mutex_unlock(&hw->fl_profs_locks[blk]);
+
+ return status;
+}
+
+/**
+ * ice_flow_add_entry - Add a flow entry
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @prof_id: ID of the profile to add a new flow entry to
+ * @entry_id: unique ID to identify this flow entry
+ * @vsi_handle: software VSI handle for the flow entry
+ * @prio: priority of the flow entry
+ * @data: pointer to a data buffer containing flow entry's match values/masks
+ * @entry_h: pointer to buffer that receives the new flow entry's handle
+ */
+int
+ice_flow_add_entry(struct ice_hw *hw, enum ice_block blk, u64 prof_id,
+ u64 entry_id, u16 vsi_handle, enum ice_flow_priority prio,
+ void *data, u64 *entry_h)
+{
+ struct ice_flow_entry *e = NULL;
+ struct ice_flow_prof *prof;
+ int status;
+
+ /* No flow entry data is expected for RSS */
+ if (!entry_h || (!data && blk != ICE_BLK_RSS))
+ return -EINVAL;
+
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+
+ mutex_lock(&hw->fl_profs_locks[blk]);
+
+ prof = ice_flow_find_prof_id(hw, blk, prof_id);
+ if (!prof) {
+ status = -ENOENT;
+ } else {
+ /* Allocate memory for the entry being added and associate
+ * the VSI to the found flow profile
+ */
+ e = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*e), GFP_KERNEL);
+ if (!e)
+ status = -ENOMEM;
+ else
+ status = ice_flow_assoc_prof(hw, blk, prof, vsi_handle);
+ }
+
+ mutex_unlock(&hw->fl_profs_locks[blk]);
+ if (status)
+ goto out;
+
+ e->id = entry_id;
+ e->vsi_handle = vsi_handle;
+ e->prof = prof;
+ e->priority = prio;
+
+ switch (blk) {
+ case ICE_BLK_FD:
+ case ICE_BLK_RSS:
+ break;
+ default:
+ status = -EOPNOTSUPP;
+ goto out;
+ }
+
+ mutex_lock(&prof->entries_lock);
+ list_add(&e->l_entry, &prof->entries);
+ mutex_unlock(&prof->entries_lock);
+
+ *entry_h = ICE_FLOW_ENTRY_HNDL(e);
+
+out:
+ if (status && e) {
+ if (e->entry)
+ devm_kfree(ice_hw_to_dev(hw), e->entry);
+ devm_kfree(ice_hw_to_dev(hw), e);
+ }
+
+ return status;
+}
+
+/**
+ * ice_flow_rem_entry - Remove a flow entry
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @entry_h: handle to the flow entry to be removed
+ */
+int ice_flow_rem_entry(struct ice_hw *hw, enum ice_block blk, u64 entry_h)
+{
+ struct ice_flow_entry *entry;
+ struct ice_flow_prof *prof;
+ int status = 0;
+
+ if (entry_h == ICE_FLOW_ENTRY_HANDLE_INVAL)
+ return -EINVAL;
+
+ entry = ICE_FLOW_ENTRY_PTR(entry_h);
+
+ /* Retain the pointer to the flow profile as the entry will be freed */
+ prof = entry->prof;
+
+ if (prof) {
+ mutex_lock(&prof->entries_lock);
+ status = ice_flow_rem_entry_sync(hw, blk, entry);
+ mutex_unlock(&prof->entries_lock);
+ }
+
+ return status;
+}
+
+/**
+ * ice_flow_set_fld_ext - specifies locations of field from entry's input buffer
+ * @seg: packet segment the field being set belongs to
+ * @fld: field to be set
+ * @field_type: type of the field
+ * @val_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of the value to match from
+ * entry's input buffer
+ * @mask_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of mask value from entry's
+ * input buffer
+ * @last_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of last/upper value from
+ * entry's input buffer
+ *
+ * This helper function stores information of a field being matched, including
+ * the type of the field and the locations of the value to match, the mask, and
+ * the upper-bound value in the start of the input buffer for a flow entry.
+ * This function should only be used for fixed-size data structures.
+ *
+ * This function also opportunistically determines the protocol headers to be
+ * present based on the fields being set. Some fields cannot be used alone to
+ * determine the protocol headers present. Sometimes, fields for particular
+ * protocol headers are not matched. In those cases, the protocol headers
+ * must be explicitly set.
+ */
+static void
+ice_flow_set_fld_ext(struct ice_flow_seg_info *seg, enum ice_flow_field fld,
+ enum ice_flow_fld_match_type field_type, u16 val_loc,
+ u16 mask_loc, u16 last_loc)
+{
+ u64 bit = BIT_ULL(fld);
+
+ seg->match |= bit;
+ if (field_type == ICE_FLOW_FLD_TYPE_RANGE)
+ seg->range |= bit;
+
+ seg->fields[fld].type = field_type;
+ seg->fields[fld].src.val = val_loc;
+ seg->fields[fld].src.mask = mask_loc;
+ seg->fields[fld].src.last = last_loc;
+
+ ICE_FLOW_SET_HDRS(seg, ice_flds_info[fld].hdr);
+}
+
+/**
+ * ice_flow_set_fld - specifies locations of field from entry's input buffer
+ * @seg: packet segment the field being set belongs to
+ * @fld: field to be set
+ * @val_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of the value to match from
+ * entry's input buffer
+ * @mask_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of mask value from entry's
+ * input buffer
+ * @last_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of last/upper value from
+ * entry's input buffer
+ * @range: indicate if field being matched is to be in a range
+ *
+ * This function specifies the locations, in the form of byte offsets from the
+ * start of the input buffer for a flow entry, from where the value to match,
+ * the mask value, and upper value can be extracted. These locations are then
+ * stored in the flow profile. When adding a flow entry associated with the
+ * flow profile, these locations will be used to quickly extract the values and
+ * create the content of a match entry. This function should only be used for
+ * fixed-size data structures.
+ */
+void
+ice_flow_set_fld(struct ice_flow_seg_info *seg, enum ice_flow_field fld,
+ u16 val_loc, u16 mask_loc, u16 last_loc, bool range)
+{
+ enum ice_flow_fld_match_type t = range ?
+ ICE_FLOW_FLD_TYPE_RANGE : ICE_FLOW_FLD_TYPE_REG;
+
+ ice_flow_set_fld_ext(seg, fld, t, val_loc, mask_loc, last_loc);
+}
+
+/**
+ * ice_flow_add_fld_raw - sets locations of a raw field from entry's input buf
+ * @seg: packet segment the field being set belongs to
+ * @off: offset of the raw field from the beginning of the segment in bytes
+ * @len: length of the raw pattern to be matched
+ * @val_loc: location of the value to match from entry's input buffer
+ * @mask_loc: location of mask value from entry's input buffer
+ *
+ * This function specifies the offset of the raw field to be match from the
+ * beginning of the specified packet segment, and the locations, in the form of
+ * byte offsets from the start of the input buffer for a flow entry, from where
+ * the value to match and the mask value to be extracted. These locations are
+ * then stored in the flow profile. When adding flow entries to the associated
+ * flow profile, these locations can be used to quickly extract the values to
+ * create the content of a match entry. This function should only be used for
+ * fixed-size data structures.
+ */
+void
+ice_flow_add_fld_raw(struct ice_flow_seg_info *seg, u16 off, u8 len,
+ u16 val_loc, u16 mask_loc)
+{
+ if (seg->raws_cnt < ICE_FLOW_SEG_RAW_FLD_MAX) {
+ seg->raws[seg->raws_cnt].off = off;
+ seg->raws[seg->raws_cnt].info.type = ICE_FLOW_FLD_TYPE_SIZE;
+ seg->raws[seg->raws_cnt].info.src.val = val_loc;
+ seg->raws[seg->raws_cnt].info.src.mask = mask_loc;
+ /* The "last" field is used to store the length of the field */
+ seg->raws[seg->raws_cnt].info.src.last = len;
+ }
+
+ /* Overflows of "raws" will be handled as an error condition later in
+ * the flow when this information is processed.
+ */
+ seg->raws_cnt++;
+}
+
+/**
+ * ice_flow_rem_vsi_prof - remove VSI from flow profile
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @prof_id: unique ID to identify this flow profile
+ *
+ * This function removes the flow entries associated to the input
+ * VSI handle and disassociate the VSI from the flow profile.
+ */
+int ice_flow_rem_vsi_prof(struct ice_hw *hw, u16 vsi_handle, u64 prof_id)
+{
+ struct ice_flow_prof *prof;
+ int status = 0;
+
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+
+ /* find flow profile pointer with input package block and profile ID */
+ prof = ice_flow_find_prof_id(hw, ICE_BLK_FD, prof_id);
+ if (!prof) {
+ ice_debug(hw, ICE_DBG_PKG, "Cannot find flow profile id=%llu\n",
+ prof_id);
+ return -ENOENT;
+ }
+
+ /* Remove all remaining flow entries before removing the flow profile */
+ if (!list_empty(&prof->entries)) {
+ struct ice_flow_entry *e, *t;
+
+ mutex_lock(&prof->entries_lock);
+ list_for_each_entry_safe(e, t, &prof->entries, l_entry) {
+ if (e->vsi_handle != vsi_handle)
+ continue;
+
+ status = ice_flow_rem_entry_sync(hw, ICE_BLK_FD, e);
+ if (status)
+ break;
+ }
+ mutex_unlock(&prof->entries_lock);
+ }
+ if (status)
+ return status;
+
+ /* disassociate the flow profile from sw VSI handle */
+ status = ice_flow_disassoc_prof(hw, ICE_BLK_FD, prof, vsi_handle);
+ if (status)
+ ice_debug(hw, ICE_DBG_PKG, "ice_flow_disassoc_prof() failed with status=%d\n",
+ status);
+ return status;
+}
+
+#define ICE_FLOW_RSS_SEG_HDR_L2_MASKS \
+ (ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN)
+
+#define ICE_FLOW_RSS_SEG_HDR_L3_MASKS \
+ (ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6)
+
+#define ICE_FLOW_RSS_SEG_HDR_L4_MASKS \
+ (ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_SCTP)
+
+#define ICE_FLOW_RSS_SEG_HDR_VAL_MASKS \
+ (ICE_FLOW_RSS_SEG_HDR_L2_MASKS | \
+ ICE_FLOW_RSS_SEG_HDR_L3_MASKS | \
+ ICE_FLOW_RSS_SEG_HDR_L4_MASKS)
+
+/**
+ * ice_flow_set_rss_seg_info - setup packet segments for RSS
+ * @segs: pointer to the flow field segment(s)
+ * @hash_fields: fields to be hashed on for the segment(s)
+ * @flow_hdr: protocol header fields within a packet segment
+ *
+ * Helper function to extract fields from hash bitmap and use flow
+ * header value to set flow field segment for further use in flow
+ * profile entry or removal.
+ */
+static int
+ice_flow_set_rss_seg_info(struct ice_flow_seg_info *segs, u64 hash_fields,
+ u32 flow_hdr)
+{
+ u64 val;
+ u8 i;
+
+ for_each_set_bit(i, (unsigned long *)&hash_fields,
+ ICE_FLOW_FIELD_IDX_MAX)
+ ice_flow_set_fld(segs, (enum ice_flow_field)i,
+ ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, false);
+
+ ICE_FLOW_SET_HDRS(segs, flow_hdr);
+
+ if (segs->hdrs & ~ICE_FLOW_RSS_SEG_HDR_VAL_MASKS &
+ ~ICE_FLOW_RSS_HDRS_INNER_MASK & ~ICE_FLOW_SEG_HDR_IPV_OTHER)
+ return -EINVAL;
+
+ val = (u64)(segs->hdrs & ICE_FLOW_RSS_SEG_HDR_L3_MASKS);
+ if (val && !is_power_of_2(val))
+ return -EIO;
+
+ val = (u64)(segs->hdrs & ICE_FLOW_RSS_SEG_HDR_L4_MASKS);
+ if (val && !is_power_of_2(val))
+ return -EIO;
+
+ return 0;
+}
+
+/**
+ * ice_rem_vsi_rss_list - remove VSI from RSS list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ *
+ * Remove the VSI from all RSS configurations in the list.
+ */
+void ice_rem_vsi_rss_list(struct ice_hw *hw, u16 vsi_handle)
+{
+ struct ice_rss_cfg *r, *tmp;
+
+ if (list_empty(&hw->rss_list_head))
+ return;
+
+ mutex_lock(&hw->rss_locks);
+ list_for_each_entry_safe(r, tmp, &hw->rss_list_head, l_entry)
+ if (test_and_clear_bit(vsi_handle, r->vsis))
+ if (bitmap_empty(r->vsis, ICE_MAX_VSI)) {
+ list_del(&r->l_entry);
+ devm_kfree(ice_hw_to_dev(hw), r);
+ }
+ mutex_unlock(&hw->rss_locks);
+}
+
+/**
+ * ice_rem_vsi_rss_cfg - remove RSS configurations associated with VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ *
+ * This function will iterate through all flow profiles and disassociate
+ * the VSI from that profile. If the flow profile has no VSIs it will
+ * be removed.
+ */
+int ice_rem_vsi_rss_cfg(struct ice_hw *hw, u16 vsi_handle)
+{
+ const enum ice_block blk = ICE_BLK_RSS;
+ struct ice_flow_prof *p, *t;
+ int status = 0;
+
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+
+ if (list_empty(&hw->fl_profs[blk]))
+ return 0;
+
+ mutex_lock(&hw->rss_locks);
+ list_for_each_entry_safe(p, t, &hw->fl_profs[blk], l_entry)
+ if (test_bit(vsi_handle, p->vsis)) {
+ status = ice_flow_disassoc_prof(hw, blk, p, vsi_handle);
+ if (status)
+ break;
+
+ if (bitmap_empty(p->vsis, ICE_MAX_VSI)) {
+ status = ice_flow_rem_prof(hw, blk, p->id);
+ if (status)
+ break;
+ }
+ }
+ mutex_unlock(&hw->rss_locks);
+
+ return status;
+}
+
+/**
+ * ice_rem_rss_list - remove RSS configuration from list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @prof: pointer to flow profile
+ *
+ * Assumption: lock has already been acquired for RSS list
+ */
+static void
+ice_rem_rss_list(struct ice_hw *hw, u16 vsi_handle, struct ice_flow_prof *prof)
+{
+ struct ice_rss_cfg *r, *tmp;
+
+ /* Search for RSS hash fields associated to the VSI that match the
+ * hash configurations associated to the flow profile. If found
+ * remove from the RSS entry list of the VSI context and delete entry.
+ */
+ list_for_each_entry_safe(r, tmp, &hw->rss_list_head, l_entry)
+ if (r->hashed_flds == prof->segs[prof->segs_cnt - 1].match &&
+ r->packet_hdr == prof->segs[prof->segs_cnt - 1].hdrs) {
+ clear_bit(vsi_handle, r->vsis);
+ if (bitmap_empty(r->vsis, ICE_MAX_VSI)) {
+ list_del(&r->l_entry);
+ devm_kfree(ice_hw_to_dev(hw), r);
+ }
+ return;
+ }
+}
+
+/**
+ * ice_add_rss_list - add RSS configuration to list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @prof: pointer to flow profile
+ *
+ * Assumption: lock has already been acquired for RSS list
+ */
+static int
+ice_add_rss_list(struct ice_hw *hw, u16 vsi_handle, struct ice_flow_prof *prof)
+{
+ struct ice_rss_cfg *r, *rss_cfg;
+
+ list_for_each_entry(r, &hw->rss_list_head, l_entry)
+ if (r->hashed_flds == prof->segs[prof->segs_cnt - 1].match &&
+ r->packet_hdr == prof->segs[prof->segs_cnt - 1].hdrs) {
+ set_bit(vsi_handle, r->vsis);
+ return 0;
+ }
+
+ rss_cfg = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*rss_cfg),
+ GFP_KERNEL);
+ if (!rss_cfg)
+ return -ENOMEM;
+
+ rss_cfg->hashed_flds = prof->segs[prof->segs_cnt - 1].match;
+ rss_cfg->packet_hdr = prof->segs[prof->segs_cnt - 1].hdrs;
+ set_bit(vsi_handle, rss_cfg->vsis);
+
+ list_add_tail(&rss_cfg->l_entry, &hw->rss_list_head);
+
+ return 0;
+}
+
+#define ICE_FLOW_PROF_HASH_S 0
+#define ICE_FLOW_PROF_HASH_M (0xFFFFFFFFULL << ICE_FLOW_PROF_HASH_S)
+#define ICE_FLOW_PROF_HDR_S 32
+#define ICE_FLOW_PROF_HDR_M (0x3FFFFFFFULL << ICE_FLOW_PROF_HDR_S)
+#define ICE_FLOW_PROF_ENCAP_S 63
+#define ICE_FLOW_PROF_ENCAP_M (BIT_ULL(ICE_FLOW_PROF_ENCAP_S))
+
+#define ICE_RSS_OUTER_HEADERS 1
+#define ICE_RSS_INNER_HEADERS 2
+
+/* Flow profile ID format:
+ * [0:31] - Packet match fields
+ * [32:62] - Protocol header
+ * [63] - Encapsulation flag, 0 if non-tunneled, 1 if tunneled
+ */
+#define ICE_FLOW_GEN_PROFID(hash, hdr, segs_cnt) \
+ ((u64)(((u64)(hash) & ICE_FLOW_PROF_HASH_M) | \
+ (((u64)(hdr) << ICE_FLOW_PROF_HDR_S) & ICE_FLOW_PROF_HDR_M) | \
+ ((u8)((segs_cnt) - 1) ? ICE_FLOW_PROF_ENCAP_M : 0)))
+
+/**
+ * ice_add_rss_cfg_sync - add an RSS configuration
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hashed_flds: hash bit fields (ICE_FLOW_HASH_*) to configure
+ * @addl_hdrs: protocol header fields
+ * @segs_cnt: packet segment count
+ *
+ * Assumption: lock has already been acquired for RSS list
+ */
+static int
+ice_add_rss_cfg_sync(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+ u32 addl_hdrs, u8 segs_cnt)
+{
+ const enum ice_block blk = ICE_BLK_RSS;
+ struct ice_flow_prof *prof = NULL;
+ struct ice_flow_seg_info *segs;
+ int status;
+
+ if (!segs_cnt || segs_cnt > ICE_FLOW_SEG_MAX)
+ return -EINVAL;
+
+ segs = kcalloc(segs_cnt, sizeof(*segs), GFP_KERNEL);
+ if (!segs)
+ return -ENOMEM;
+
+ /* Construct the packet segment info from the hashed fields */
+ status = ice_flow_set_rss_seg_info(&segs[segs_cnt - 1], hashed_flds,
+ addl_hdrs);
+ if (status)
+ goto exit;
+
+ /* Search for a flow profile that has matching headers, hash fields
+ * and has the input VSI associated to it. If found, no further
+ * operations required and exit.
+ */
+ prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt,
+ vsi_handle,
+ ICE_FLOW_FIND_PROF_CHK_FLDS |
+ ICE_FLOW_FIND_PROF_CHK_VSI);
+ if (prof)
+ goto exit;
+
+ /* Check if a flow profile exists with the same protocol headers and
+ * associated with the input VSI. If so disassociate the VSI from
+ * this profile. The VSI will be added to a new profile created with
+ * the protocol header and new hash field configuration.
+ */
+ prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt,
+ vsi_handle, ICE_FLOW_FIND_PROF_CHK_VSI);
+ if (prof) {
+ status = ice_flow_disassoc_prof(hw, blk, prof, vsi_handle);
+ if (!status)
+ ice_rem_rss_list(hw, vsi_handle, prof);
+ else
+ goto exit;
+
+ /* Remove profile if it has no VSIs associated */
+ if (bitmap_empty(prof->vsis, ICE_MAX_VSI)) {
+ status = ice_flow_rem_prof(hw, blk, prof->id);
+ if (status)
+ goto exit;
+ }
+ }
+
+ /* Search for a profile that has same match fields only. If this
+ * exists then associate the VSI to this profile.
+ */
+ prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt,
+ vsi_handle,
+ ICE_FLOW_FIND_PROF_CHK_FLDS);
+ if (prof) {
+ status = ice_flow_assoc_prof(hw, blk, prof, vsi_handle);
+ if (!status)
+ status = ice_add_rss_list(hw, vsi_handle, prof);
+ goto exit;
+ }
+
+ /* Create a new flow profile with generated profile and packet
+ * segment information.
+ */
+ status = ice_flow_add_prof(hw, blk, ICE_FLOW_RX,
+ ICE_FLOW_GEN_PROFID(hashed_flds,
+ segs[segs_cnt - 1].hdrs,
+ segs_cnt),
+ segs, segs_cnt, &prof);
+ if (status)
+ goto exit;
+
+ status = ice_flow_assoc_prof(hw, blk, prof, vsi_handle);
+ /* If association to a new flow profile failed then this profile can
+ * be removed.
+ */
+ if (status) {
+ ice_flow_rem_prof(hw, blk, prof->id);
+ goto exit;
+ }
+
+ status = ice_add_rss_list(hw, vsi_handle, prof);
+
+exit:
+ kfree(segs);
+ return status;
+}
+
+/**
+ * ice_add_rss_cfg - add an RSS configuration with specified hashed fields
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hashed_flds: hash bit fields (ICE_FLOW_HASH_*) to configure
+ * @addl_hdrs: protocol header fields
+ *
+ * This function will generate a flow profile based on fields associated with
+ * the input fields to hash on, the flow type and use the VSI number to add
+ * a flow entry to the profile.
+ */
+int
+ice_add_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+ u32 addl_hdrs)
+{
+ int status;
+
+ if (hashed_flds == ICE_HASH_INVALID ||
+ !ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+
+ mutex_lock(&hw->rss_locks);
+ status = ice_add_rss_cfg_sync(hw, vsi_handle, hashed_flds, addl_hdrs,
+ ICE_RSS_OUTER_HEADERS);
+ if (!status)
+ status = ice_add_rss_cfg_sync(hw, vsi_handle, hashed_flds,
+ addl_hdrs, ICE_RSS_INNER_HEADERS);
+ mutex_unlock(&hw->rss_locks);
+
+ return status;
+}
+
+/**
+ * ice_rem_rss_cfg_sync - remove an existing RSS configuration
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hashed_flds: Packet hash types (ICE_FLOW_HASH_*) to remove
+ * @addl_hdrs: Protocol header fields within a packet segment
+ * @segs_cnt: packet segment count
+ *
+ * Assumption: lock has already been acquired for RSS list
+ */
+static int
+ice_rem_rss_cfg_sync(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+ u32 addl_hdrs, u8 segs_cnt)
+{
+ const enum ice_block blk = ICE_BLK_RSS;
+ struct ice_flow_seg_info *segs;
+ struct ice_flow_prof *prof;
+ int status;
+
+ segs = kcalloc(segs_cnt, sizeof(*segs), GFP_KERNEL);
+ if (!segs)
+ return -ENOMEM;
+
+ /* Construct the packet segment info from the hashed fields */
+ status = ice_flow_set_rss_seg_info(&segs[segs_cnt - 1], hashed_flds,
+ addl_hdrs);
+ if (status)
+ goto out;
+
+ prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt,
+ vsi_handle,
+ ICE_FLOW_FIND_PROF_CHK_FLDS);
+ if (!prof) {
+ status = -ENOENT;
+ goto out;
+ }
+
+ status = ice_flow_disassoc_prof(hw, blk, prof, vsi_handle);
+ if (status)
+ goto out;
+
+ /* Remove RSS configuration from VSI context before deleting
+ * the flow profile.
+ */
+ ice_rem_rss_list(hw, vsi_handle, prof);
+
+ if (bitmap_empty(prof->vsis, ICE_MAX_VSI))
+ status = ice_flow_rem_prof(hw, blk, prof->id);
+
+out:
+ kfree(segs);
+ return status;
+}
+
+/**
+ * ice_rem_rss_cfg - remove an existing RSS config with matching hashed fields
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hashed_flds: Packet hash types (ICE_FLOW_HASH_*) to remove
+ * @addl_hdrs: Protocol header fields within a packet segment
+ *
+ * This function will lookup the flow profile based on the input
+ * hash field bitmap, iterate through the profile entry list of
+ * that profile and find entry associated with input VSI to be
+ * removed. Calls are made to underlying flow s which will APIs
+ * turn build or update buffers for RSS XLT1 section.
+ */
+int __maybe_unused
+ice_rem_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+ u32 addl_hdrs)
+{
+ int status;
+
+ if (hashed_flds == ICE_HASH_INVALID ||
+ !ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+
+ mutex_lock(&hw->rss_locks);
+ status = ice_rem_rss_cfg_sync(hw, vsi_handle, hashed_flds, addl_hdrs,
+ ICE_RSS_OUTER_HEADERS);
+ if (!status)
+ status = ice_rem_rss_cfg_sync(hw, vsi_handle, hashed_flds,
+ addl_hdrs, ICE_RSS_INNER_HEADERS);
+ mutex_unlock(&hw->rss_locks);
+
+ return status;
+}
+
+/* Mapping of AVF hash bit fields to an L3-L4 hash combination.
+ * As the ice_flow_avf_hdr_field represent individual bit shifts in a hash,
+ * convert its values to their appropriate flow L3, L4 values.
+ */
+#define ICE_FLOW_AVF_RSS_IPV4_MASKS \
+ (BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_OTHER) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV4))
+#define ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS \
+ (BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP_SYN_NO_ACK) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP))
+#define ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS \
+ (BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV4_UDP) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV4_UDP) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_UDP))
+#define ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS \
+ (ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS | ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS | \
+ ICE_FLOW_AVF_RSS_IPV4_MASKS | BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP))
+
+#define ICE_FLOW_AVF_RSS_IPV6_MASKS \
+ (BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_OTHER) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV6))
+#define ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS \
+ (BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV6_UDP) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV6_UDP) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_UDP))
+#define ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS \
+ (BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP_SYN_NO_ACK) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP))
+#define ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS \
+ (ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS | ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS | \
+ ICE_FLOW_AVF_RSS_IPV6_MASKS | BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP))
+
+/**
+ * ice_add_avf_rss_cfg - add an RSS configuration for AVF driver
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @avf_hash: hash bit fields (ICE_AVF_FLOW_FIELD_*) to configure
+ *
+ * This function will take the hash bitmap provided by the AVF driver via a
+ * message, convert it to ICE-compatible values, and configure RSS flow
+ * profiles.
+ */
+int ice_add_avf_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 avf_hash)
+{
+ int status = 0;
+ u64 hash_flds;
+
+ if (avf_hash == ICE_AVF_FLOW_FIELD_INVALID ||
+ !ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+
+ /* Make sure no unsupported bits are specified */
+ if (avf_hash & ~(ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS |
+ ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS))
+ return -EIO;
+
+ hash_flds = avf_hash;
+
+ /* Always create an L3 RSS configuration for any L4 RSS configuration */
+ if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS)
+ hash_flds |= ICE_FLOW_AVF_RSS_IPV4_MASKS;
+
+ if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS)
+ hash_flds |= ICE_FLOW_AVF_RSS_IPV6_MASKS;
+
+ /* Create the corresponding RSS configuration for each valid hash bit */
+ while (hash_flds) {
+ u64 rss_hash = ICE_HASH_INVALID;
+
+ if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS) {
+ if (hash_flds & ICE_FLOW_AVF_RSS_IPV4_MASKS) {
+ rss_hash = ICE_FLOW_HASH_IPV4;
+ hash_flds &= ~ICE_FLOW_AVF_RSS_IPV4_MASKS;
+ } else if (hash_flds &
+ ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS) {
+ rss_hash = ICE_FLOW_HASH_IPV4 |
+ ICE_FLOW_HASH_TCP_PORT;
+ hash_flds &= ~ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS;
+ } else if (hash_flds &
+ ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS) {
+ rss_hash = ICE_FLOW_HASH_IPV4 |
+ ICE_FLOW_HASH_UDP_PORT;
+ hash_flds &= ~ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS;
+ } else if (hash_flds &
+ BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP)) {
+ rss_hash = ICE_FLOW_HASH_IPV4 |
+ ICE_FLOW_HASH_SCTP_PORT;
+ hash_flds &=
+ ~BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP);
+ }
+ } else if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS) {
+ if (hash_flds & ICE_FLOW_AVF_RSS_IPV6_MASKS) {
+ rss_hash = ICE_FLOW_HASH_IPV6;
+ hash_flds &= ~ICE_FLOW_AVF_RSS_IPV6_MASKS;
+ } else if (hash_flds &
+ ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS) {
+ rss_hash = ICE_FLOW_HASH_IPV6 |
+ ICE_FLOW_HASH_TCP_PORT;
+ hash_flds &= ~ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS;
+ } else if (hash_flds &
+ ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS) {
+ rss_hash = ICE_FLOW_HASH_IPV6 |
+ ICE_FLOW_HASH_UDP_PORT;
+ hash_flds &= ~ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS;
+ } else if (hash_flds &
+ BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP)) {
+ rss_hash = ICE_FLOW_HASH_IPV6 |
+ ICE_FLOW_HASH_SCTP_PORT;
+ hash_flds &=
+ ~BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP);
+ }
+ }
+
+ if (rss_hash == ICE_HASH_INVALID)
+ return -EIO;
+
+ status = ice_add_rss_cfg(hw, vsi_handle, rss_hash,
+ ICE_FLOW_SEG_HDR_NONE);
+ if (status)
+ break;
+ }
+
+ return status;
+}
+
+/**
+ * ice_replay_rss_cfg - replay RSS configurations associated with VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ */
+int ice_replay_rss_cfg(struct ice_hw *hw, u16 vsi_handle)
+{
+ struct ice_rss_cfg *r;
+ int status = 0;
+
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+
+ mutex_lock(&hw->rss_locks);
+ list_for_each_entry(r, &hw->rss_list_head, l_entry) {
+ if (test_bit(vsi_handle, r->vsis)) {
+ status = ice_add_rss_cfg_sync(hw, vsi_handle,
+ r->hashed_flds,
+ r->packet_hdr,
+ ICE_RSS_OUTER_HEADERS);
+ if (status)
+ break;
+ status = ice_add_rss_cfg_sync(hw, vsi_handle,
+ r->hashed_flds,
+ r->packet_hdr,
+ ICE_RSS_INNER_HEADERS);
+ if (status)
+ break;
+ }
+ }
+ mutex_unlock(&hw->rss_locks);
+
+ return status;
+}
+
+/**
+ * ice_get_rss_cfg - returns hashed fields for the given header types
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hdrs: protocol header type
+ *
+ * This function will return the match fields of the first instance of flow
+ * profile having the given header types and containing input VSI
+ */
+u64 ice_get_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u32 hdrs)
+{
+ u64 rss_hash = ICE_HASH_INVALID;
+ struct ice_rss_cfg *r;
+
+ /* verify if the protocol header is non zero and VSI is valid */
+ if (hdrs == ICE_FLOW_SEG_HDR_NONE || !ice_is_vsi_valid(hw, vsi_handle))
+ return ICE_HASH_INVALID;
+
+ mutex_lock(&hw->rss_locks);
+ list_for_each_entry(r, &hw->rss_list_head, l_entry)
+ if (test_bit(vsi_handle, r->vsis) &&
+ r->packet_hdr == hdrs) {
+ rss_hash = r->hashed_flds;
+ break;
+ }
+ mutex_unlock(&hw->rss_locks);
+
+ return rss_hash;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
new file mode 100644
index 000000000..b465d27d9
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -0,0 +1,416 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_FLOW_H_
+#define _ICE_FLOW_H_
+
+#include "ice_flex_type.h"
+
+#define ICE_FLOW_ENTRY_HANDLE_INVAL 0
+#define ICE_FLOW_FLD_OFF_INVAL 0xffff
+
+/* Generate flow hash field from flow field type(s) */
+#define ICE_FLOW_HASH_ETH \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_DA) | \
+ BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_SA))
+#define ICE_FLOW_HASH_IPV4 \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | \
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA))
+#define ICE_FLOW_HASH_IPV6 \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) | \
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA))
+#define ICE_FLOW_HASH_TCP_PORT \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT) | \
+ BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT))
+#define ICE_FLOW_HASH_UDP_PORT \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT) | \
+ BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT))
+#define ICE_FLOW_HASH_SCTP_PORT \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT) | \
+ BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT))
+
+#define ICE_HASH_INVALID 0
+#define ICE_HASH_TCP_IPV4 (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_TCP_PORT)
+#define ICE_HASH_TCP_IPV6 (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_TCP_PORT)
+#define ICE_HASH_UDP_IPV4 (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_UDP_PORT)
+#define ICE_HASH_UDP_IPV6 (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_UDP_PORT)
+
+#define ICE_FLOW_HASH_GTP_TEID \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_GTPC_TEID))
+
+#define ICE_FLOW_HASH_GTP_IPV4_TEID \
+ (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_TEID)
+#define ICE_FLOW_HASH_GTP_IPV6_TEID \
+ (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_TEID)
+
+#define ICE_FLOW_HASH_GTP_U_TEID \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_IP_TEID))
+
+#define ICE_FLOW_HASH_GTP_U_IPV4_TEID \
+ (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_U_TEID)
+#define ICE_FLOW_HASH_GTP_U_IPV6_TEID \
+ (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_U_TEID)
+
+#define ICE_FLOW_HASH_GTP_U_EH_TEID \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_EH_TEID))
+
+#define ICE_FLOW_HASH_GTP_U_EH_QFI \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_EH_QFI))
+
+#define ICE_FLOW_HASH_GTP_U_IPV4_EH \
+ (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_U_EH_TEID | \
+ ICE_FLOW_HASH_GTP_U_EH_QFI)
+#define ICE_FLOW_HASH_GTP_U_IPV6_EH \
+ (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_U_EH_TEID | \
+ ICE_FLOW_HASH_GTP_U_EH_QFI)
+
+#define ICE_FLOW_HASH_PPPOE_SESS_ID \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID))
+
+#define ICE_FLOW_HASH_PPPOE_SESS_ID_ETH \
+ (ICE_FLOW_HASH_ETH | ICE_FLOW_HASH_PPPOE_SESS_ID)
+#define ICE_FLOW_HASH_PPPOE_TCP_ID \
+ (ICE_FLOW_HASH_TCP_PORT | ICE_FLOW_HASH_PPPOE_SESS_ID)
+#define ICE_FLOW_HASH_PPPOE_UDP_ID \
+ (ICE_FLOW_HASH_UDP_PORT | ICE_FLOW_HASH_PPPOE_SESS_ID)
+
+#define ICE_FLOW_HASH_PFCP_SEID \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_PFCP_SEID))
+#define ICE_FLOW_HASH_PFCP_IPV4_SEID \
+ (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_PFCP_SEID)
+#define ICE_FLOW_HASH_PFCP_IPV6_SEID \
+ (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_PFCP_SEID)
+
+#define ICE_FLOW_HASH_L2TPV3_SESS_ID \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID))
+#define ICE_FLOW_HASH_L2TPV3_IPV4_SESS_ID \
+ (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_L2TPV3_SESS_ID)
+#define ICE_FLOW_HASH_L2TPV3_IPV6_SESS_ID \
+ (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_L2TPV3_SESS_ID)
+
+#define ICE_FLOW_HASH_ESP_SPI \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_ESP_SPI))
+#define ICE_FLOW_HASH_ESP_IPV4_SPI \
+ (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_ESP_SPI)
+#define ICE_FLOW_HASH_ESP_IPV6_SPI \
+ (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_ESP_SPI)
+
+#define ICE_FLOW_HASH_AH_SPI \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_AH_SPI))
+#define ICE_FLOW_HASH_AH_IPV4_SPI \
+ (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_AH_SPI)
+#define ICE_FLOW_HASH_AH_IPV6_SPI \
+ (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_AH_SPI)
+
+#define ICE_FLOW_HASH_NAT_T_ESP_SPI \
+ (BIT_ULL(ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI))
+#define ICE_FLOW_HASH_NAT_T_ESP_IPV4_SPI \
+ (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_NAT_T_ESP_SPI)
+#define ICE_FLOW_HASH_NAT_T_ESP_IPV6_SPI \
+ (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_NAT_T_ESP_SPI)
+
+/* Protocol header fields within a packet segment. A segment consists of one or
+ * more protocol headers that make up a logical group of protocol headers. Each
+ * logical group of protocol headers encapsulates or is encapsulated using/by
+ * tunneling or encapsulation protocols for network virtualization such as GRE,
+ * VxLAN, etc.
+ */
+enum ice_flow_seg_hdr {
+ ICE_FLOW_SEG_HDR_NONE = 0x00000000,
+ ICE_FLOW_SEG_HDR_ETH = 0x00000001,
+ ICE_FLOW_SEG_HDR_VLAN = 0x00000002,
+ ICE_FLOW_SEG_HDR_IPV4 = 0x00000004,
+ ICE_FLOW_SEG_HDR_IPV6 = 0x00000008,
+ ICE_FLOW_SEG_HDR_ARP = 0x00000010,
+ ICE_FLOW_SEG_HDR_ICMP = 0x00000020,
+ ICE_FLOW_SEG_HDR_TCP = 0x00000040,
+ ICE_FLOW_SEG_HDR_UDP = 0x00000080,
+ ICE_FLOW_SEG_HDR_SCTP = 0x00000100,
+ ICE_FLOW_SEG_HDR_GRE = 0x00000200,
+ ICE_FLOW_SEG_HDR_GTPC = 0x00000400,
+ ICE_FLOW_SEG_HDR_GTPC_TEID = 0x00000800,
+ ICE_FLOW_SEG_HDR_GTPU_IP = 0x00001000,
+ ICE_FLOW_SEG_HDR_GTPU_EH = 0x00002000,
+ ICE_FLOW_SEG_HDR_GTPU_DWN = 0x00004000,
+ ICE_FLOW_SEG_HDR_GTPU_UP = 0x00008000,
+ ICE_FLOW_SEG_HDR_PPPOE = 0x00010000,
+ ICE_FLOW_SEG_HDR_PFCP_NODE = 0x00020000,
+ ICE_FLOW_SEG_HDR_PFCP_SESSION = 0x00040000,
+ ICE_FLOW_SEG_HDR_L2TPV3 = 0x00080000,
+ ICE_FLOW_SEG_HDR_ESP = 0x00100000,
+ ICE_FLOW_SEG_HDR_AH = 0x00200000,
+ ICE_FLOW_SEG_HDR_NAT_T_ESP = 0x00400000,
+ ICE_FLOW_SEG_HDR_ETH_NON_IP = 0x00800000,
+ /* The following is an additive bit for ICE_FLOW_SEG_HDR_IPV4 and
+ * ICE_FLOW_SEG_HDR_IPV6 which include the IPV4 other PTYPEs
+ */
+ ICE_FLOW_SEG_HDR_IPV_OTHER = 0x20000000,
+};
+
+/* These segments all have the same PTYPES, but are otherwise distinguished by
+ * the value of the gtp_eh_pdu and gtp_eh_pdu_link flags:
+ *
+ * gtp_eh_pdu gtp_eh_pdu_link
+ * ICE_FLOW_SEG_HDR_GTPU_IP 0 0
+ * ICE_FLOW_SEG_HDR_GTPU_EH 1 don't care
+ * ICE_FLOW_SEG_HDR_GTPU_DWN 1 0
+ * ICE_FLOW_SEG_HDR_GTPU_UP 1 1
+ */
+#define ICE_FLOW_SEG_HDR_GTPU (ICE_FLOW_SEG_HDR_GTPU_IP | \
+ ICE_FLOW_SEG_HDR_GTPU_EH | \
+ ICE_FLOW_SEG_HDR_GTPU_DWN | \
+ ICE_FLOW_SEG_HDR_GTPU_UP)
+#define ICE_FLOW_SEG_HDR_PFCP (ICE_FLOW_SEG_HDR_PFCP_NODE | \
+ ICE_FLOW_SEG_HDR_PFCP_SESSION)
+
+enum ice_flow_field {
+ /* L2 */
+ ICE_FLOW_FIELD_IDX_ETH_DA,
+ ICE_FLOW_FIELD_IDX_ETH_SA,
+ ICE_FLOW_FIELD_IDX_S_VLAN,
+ ICE_FLOW_FIELD_IDX_C_VLAN,
+ ICE_FLOW_FIELD_IDX_ETH_TYPE,
+ /* L3 */
+ ICE_FLOW_FIELD_IDX_IPV4_DSCP,
+ ICE_FLOW_FIELD_IDX_IPV6_DSCP,
+ ICE_FLOW_FIELD_IDX_IPV4_TTL,
+ ICE_FLOW_FIELD_IDX_IPV4_PROT,
+ ICE_FLOW_FIELD_IDX_IPV6_TTL,
+ ICE_FLOW_FIELD_IDX_IPV6_PROT,
+ ICE_FLOW_FIELD_IDX_IPV4_SA,
+ ICE_FLOW_FIELD_IDX_IPV4_DA,
+ ICE_FLOW_FIELD_IDX_IPV6_SA,
+ ICE_FLOW_FIELD_IDX_IPV6_DA,
+ /* L4 */
+ ICE_FLOW_FIELD_IDX_TCP_SRC_PORT,
+ ICE_FLOW_FIELD_IDX_TCP_DST_PORT,
+ ICE_FLOW_FIELD_IDX_UDP_SRC_PORT,
+ ICE_FLOW_FIELD_IDX_UDP_DST_PORT,
+ ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT,
+ ICE_FLOW_FIELD_IDX_SCTP_DST_PORT,
+ ICE_FLOW_FIELD_IDX_TCP_FLAGS,
+ /* ARP */
+ ICE_FLOW_FIELD_IDX_ARP_SIP,
+ ICE_FLOW_FIELD_IDX_ARP_DIP,
+ ICE_FLOW_FIELD_IDX_ARP_SHA,
+ ICE_FLOW_FIELD_IDX_ARP_DHA,
+ ICE_FLOW_FIELD_IDX_ARP_OP,
+ /* ICMP */
+ ICE_FLOW_FIELD_IDX_ICMP_TYPE,
+ ICE_FLOW_FIELD_IDX_ICMP_CODE,
+ /* GRE */
+ ICE_FLOW_FIELD_IDX_GRE_KEYID,
+ /* GTPC_TEID */
+ ICE_FLOW_FIELD_IDX_GTPC_TEID,
+ /* GTPU_IP */
+ ICE_FLOW_FIELD_IDX_GTPU_IP_TEID,
+ /* GTPU_EH */
+ ICE_FLOW_FIELD_IDX_GTPU_EH_TEID,
+ ICE_FLOW_FIELD_IDX_GTPU_EH_QFI,
+ /* GTPU_UP */
+ ICE_FLOW_FIELD_IDX_GTPU_UP_TEID,
+ /* GTPU_DWN */
+ ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID,
+ /* PPPoE */
+ ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID,
+ /* PFCP */
+ ICE_FLOW_FIELD_IDX_PFCP_SEID,
+ /* L2TPv3 */
+ ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID,
+ /* ESP */
+ ICE_FLOW_FIELD_IDX_ESP_SPI,
+ /* AH */
+ ICE_FLOW_FIELD_IDX_AH_SPI,
+ /* NAT_T ESP */
+ ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI,
+ /* The total number of enums must not exceed 64 */
+ ICE_FLOW_FIELD_IDX_MAX
+};
+
+/* Flow headers and fields for AVF support */
+enum ice_flow_avf_hdr_field {
+ /* Values 0 - 28 are reserved for future use */
+ ICE_AVF_FLOW_FIELD_INVALID = 0,
+ ICE_AVF_FLOW_FIELD_UNICAST_IPV4_UDP = 29,
+ ICE_AVF_FLOW_FIELD_MULTICAST_IPV4_UDP,
+ ICE_AVF_FLOW_FIELD_IPV4_UDP,
+ ICE_AVF_FLOW_FIELD_IPV4_TCP_SYN_NO_ACK,
+ ICE_AVF_FLOW_FIELD_IPV4_TCP,
+ ICE_AVF_FLOW_FIELD_IPV4_SCTP,
+ ICE_AVF_FLOW_FIELD_IPV4_OTHER,
+ ICE_AVF_FLOW_FIELD_FRAG_IPV4,
+ /* Values 37-38 are reserved */
+ ICE_AVF_FLOW_FIELD_UNICAST_IPV6_UDP = 39,
+ ICE_AVF_FLOW_FIELD_MULTICAST_IPV6_UDP,
+ ICE_AVF_FLOW_FIELD_IPV6_UDP,
+ ICE_AVF_FLOW_FIELD_IPV6_TCP_SYN_NO_ACK,
+ ICE_AVF_FLOW_FIELD_IPV6_TCP,
+ ICE_AVF_FLOW_FIELD_IPV6_SCTP,
+ ICE_AVF_FLOW_FIELD_IPV6_OTHER,
+ ICE_AVF_FLOW_FIELD_FRAG_IPV6,
+ ICE_AVF_FLOW_FIELD_RSVD47,
+ ICE_AVF_FLOW_FIELD_FCOE_OX,
+ ICE_AVF_FLOW_FIELD_FCOE_RX,
+ ICE_AVF_FLOW_FIELD_FCOE_OTHER,
+ /* Values 51-62 are reserved */
+ ICE_AVF_FLOW_FIELD_L2_PAYLOAD = 63,
+ ICE_AVF_FLOW_FIELD_MAX
+};
+
+/* Supported RSS offloads This macro is defined to support
+ * VIRTCHNL_OP_GET_RSS_HENA_CAPS ops. PF driver sends the RSS hardware
+ * capabilities to the caller of this ops.
+ */
+#define ICE_DEFAULT_RSS_HENA ( \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_UDP) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_OTHER) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV4) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_UDP) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_OTHER) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV6) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP_SYN_NO_ACK) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV4_UDP) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV4_UDP) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP_SYN_NO_ACK) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV6_UDP) | \
+ BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV6_UDP))
+
+enum ice_flow_dir {
+ ICE_FLOW_RX = 0x02,
+};
+
+enum ice_flow_priority {
+ ICE_FLOW_PRIO_LOW,
+ ICE_FLOW_PRIO_NORMAL,
+ ICE_FLOW_PRIO_HIGH
+};
+
+#define ICE_FLOW_SEG_MAX 2
+#define ICE_FLOW_SEG_RAW_FLD_MAX 2
+#define ICE_FLOW_FV_EXTRACT_SZ 2
+
+#define ICE_FLOW_SET_HDRS(seg, val) ((seg)->hdrs |= (u32)(val))
+
+struct ice_flow_seg_xtrct {
+ u8 prot_id; /* Protocol ID of extracted header field */
+ u16 off; /* Starting offset of the field in header in bytes */
+ u8 idx; /* Index of FV entry used */
+ u8 disp; /* Displacement of field in bits fr. FV entry's start */
+ u16 mask; /* Mask for field */
+};
+
+enum ice_flow_fld_match_type {
+ ICE_FLOW_FLD_TYPE_REG, /* Value, mask */
+ ICE_FLOW_FLD_TYPE_RANGE, /* Value, mask, last (upper bound) */
+ ICE_FLOW_FLD_TYPE_PREFIX, /* IP address, prefix, size of prefix */
+ ICE_FLOW_FLD_TYPE_SIZE, /* Value, mask, size of match */
+};
+
+struct ice_flow_fld_loc {
+ /* Describe offsets of field information relative to the beginning of
+ * input buffer provided when adding flow entries.
+ */
+ u16 val; /* Offset where the value is located */
+ u16 mask; /* Offset where the mask/prefix value is located */
+ u16 last; /* Length or offset where the upper value is located */
+};
+
+struct ice_flow_fld_info {
+ enum ice_flow_fld_match_type type;
+ /* Location where to retrieve data from an input buffer */
+ struct ice_flow_fld_loc src;
+ /* Location where to put the data into the final entry buffer */
+ struct ice_flow_fld_loc entry;
+ struct ice_flow_seg_xtrct xtrct;
+};
+
+struct ice_flow_seg_fld_raw {
+ struct ice_flow_fld_info info;
+ u16 off; /* Offset from the start of the segment */
+};
+
+struct ice_flow_seg_info {
+ u32 hdrs; /* Bitmask indicating protocol headers present */
+ u64 match; /* Bitmask indicating header fields to be matched */
+ u64 range; /* Bitmask indicating header fields matched as ranges */
+
+ struct ice_flow_fld_info fields[ICE_FLOW_FIELD_IDX_MAX];
+
+ u8 raws_cnt; /* Number of raw fields to be matched */
+ struct ice_flow_seg_fld_raw raws[ICE_FLOW_SEG_RAW_FLD_MAX];
+};
+
+/* This structure describes a flow entry, and is tracked only in this file */
+struct ice_flow_entry {
+ struct list_head l_entry;
+
+ u64 id;
+ struct ice_flow_prof *prof;
+ /* Flow entry's content */
+ void *entry;
+ enum ice_flow_priority priority;
+ u16 vsi_handle;
+ u16 entry_sz;
+};
+
+#define ICE_FLOW_ENTRY_HNDL(e) ((u64)(uintptr_t)e)
+#define ICE_FLOW_ENTRY_PTR(h) ((struct ice_flow_entry *)(uintptr_t)(h))
+
+struct ice_flow_prof {
+ struct list_head l_entry;
+
+ u64 id;
+ enum ice_flow_dir dir;
+ u8 segs_cnt;
+
+ /* Keep track of flow entries associated with this flow profile */
+ struct mutex entries_lock;
+ struct list_head entries;
+
+ struct ice_flow_seg_info segs[ICE_FLOW_SEG_MAX];
+
+ /* software VSI handles referenced by this flow profile */
+ DECLARE_BITMAP(vsis, ICE_MAX_VSI);
+};
+
+struct ice_rss_cfg {
+ struct list_head l_entry;
+ /* bitmap of VSIs added to the RSS entry */
+ DECLARE_BITMAP(vsis, ICE_MAX_VSI);
+ u64 hashed_flds;
+ u32 packet_hdr;
+};
+
+int
+ice_flow_add_prof(struct ice_hw *hw, enum ice_block blk, enum ice_flow_dir dir,
+ u64 prof_id, struct ice_flow_seg_info *segs, u8 segs_cnt,
+ struct ice_flow_prof **prof);
+int ice_flow_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 prof_id);
+int
+ice_flow_add_entry(struct ice_hw *hw, enum ice_block blk, u64 prof_id,
+ u64 entry_id, u16 vsi, enum ice_flow_priority prio,
+ void *data, u64 *entry_h);
+int ice_flow_rem_entry(struct ice_hw *hw, enum ice_block blk, u64 entry_h);
+void
+ice_flow_set_fld(struct ice_flow_seg_info *seg, enum ice_flow_field fld,
+ u16 val_loc, u16 mask_loc, u16 last_loc, bool range);
+void
+ice_flow_add_fld_raw(struct ice_flow_seg_info *seg, u16 off, u8 len,
+ u16 val_loc, u16 mask_loc);
+int ice_flow_rem_vsi_prof(struct ice_hw *hw, u16 vsi_handle, u64 prof_id);
+void ice_rem_vsi_rss_list(struct ice_hw *hw, u16 vsi_handle);
+int ice_replay_rss_cfg(struct ice_hw *hw, u16 vsi_handle);
+int ice_add_avf_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds);
+int ice_rem_vsi_rss_cfg(struct ice_hw *hw, u16 vsi_handle);
+int
+ice_add_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+ u32 addl_hdrs);
+int
+ice_rem_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+ u32 addl_hdrs);
+u64 ice_get_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u32 hdrs);
+#endif /* _ICE_FLOW_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.c b/drivers/net/ethernet/intel/ice/ice_fltr.c
new file mode 100644
index 000000000..40e678cfb
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fltr.c
@@ -0,0 +1,476 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_fltr.h"
+
+/**
+ * ice_fltr_free_list - free filter lists helper
+ * @dev: pointer to the device struct
+ * @h: pointer to the list head to be freed
+ *
+ * Helper function to free filter lists previously created using
+ * ice_fltr_add_mac_to_list
+ */
+void ice_fltr_free_list(struct device *dev, struct list_head *h)
+{
+ struct ice_fltr_list_entry *e, *tmp;
+
+ list_for_each_entry_safe(e, tmp, h, list_entry) {
+ list_del(&e->list_entry);
+ devm_kfree(dev, e);
+ }
+}
+
+/**
+ * ice_fltr_add_entry_to_list - allocate and add filter entry to list
+ * @dev: pointer to device needed by alloc function
+ * @info: filter info struct that gets added to the passed in list
+ * @list: pointer to the list which contains MAC filters entry
+ */
+static int
+ice_fltr_add_entry_to_list(struct device *dev, struct ice_fltr_info *info,
+ struct list_head *list)
+{
+ struct ice_fltr_list_entry *entry;
+
+ entry = devm_kzalloc(dev, sizeof(*entry), GFP_ATOMIC);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->fltr_info = *info;
+
+ INIT_LIST_HEAD(&entry->list_entry);
+ list_add(&entry->list_entry, list);
+
+ return 0;
+}
+
+/**
+ * ice_fltr_set_vlan_vsi_promisc
+ * @hw: pointer to the hardware structure
+ * @vsi: the VSI being configured
+ * @promisc_mask: mask of promiscuous config bits
+ *
+ * Set VSI with all associated VLANs to given promiscuous mode(s)
+ */
+int
+ice_fltr_set_vlan_vsi_promisc(struct ice_hw *hw, struct ice_vsi *vsi,
+ u8 promisc_mask)
+{
+ struct ice_pf *pf = hw->back;
+ int result;
+
+ result = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_mask, false);
+ if (result && result != -EEXIST)
+ dev_err(ice_pf_to_dev(pf),
+ "Error setting promisc mode on VSI %i (rc=%d)\n",
+ vsi->vsi_num, result);
+
+ return result;
+}
+
+/**
+ * ice_fltr_clear_vlan_vsi_promisc
+ * @hw: pointer to the hardware structure
+ * @vsi: the VSI being configured
+ * @promisc_mask: mask of promiscuous config bits
+ *
+ * Clear VSI with all associated VLANs to given promiscuous mode(s)
+ */
+int
+ice_fltr_clear_vlan_vsi_promisc(struct ice_hw *hw, struct ice_vsi *vsi,
+ u8 promisc_mask)
+{
+ struct ice_pf *pf = hw->back;
+ int result;
+
+ result = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_mask, true);
+ if (result && result != -EEXIST)
+ dev_err(ice_pf_to_dev(pf),
+ "Error clearing promisc mode on VSI %i (rc=%d)\n",
+ vsi->vsi_num, result);
+
+ return result;
+}
+
+/**
+ * ice_fltr_clear_vsi_promisc - clear specified promiscuous mode(s)
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to clear mode
+ * @promisc_mask: mask of promiscuous config bits to clear
+ * @vid: VLAN ID to clear VLAN promiscuous
+ */
+int
+ice_fltr_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+ u16 vid)
+{
+ struct ice_pf *pf = hw->back;
+ int result;
+
+ result = ice_clear_vsi_promisc(hw, vsi_handle, promisc_mask, vid);
+ if (result && result != -EEXIST)
+ dev_err(ice_pf_to_dev(pf),
+ "Error clearing promisc mode on VSI %i for VID %u (rc=%d)\n",
+ ice_get_hw_vsi_num(hw, vsi_handle), vid, result);
+
+ return result;
+}
+
+/**
+ * ice_fltr_set_vsi_promisc - set given VSI to given promiscuous mode(s)
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to configure
+ * @promisc_mask: mask of promiscuous config bits
+ * @vid: VLAN ID to set VLAN promiscuous
+ */
+int
+ice_fltr_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+ u16 vid)
+{
+ struct ice_pf *pf = hw->back;
+ int result;
+
+ result = ice_set_vsi_promisc(hw, vsi_handle, promisc_mask, vid);
+ if (result && result != -EEXIST)
+ dev_err(ice_pf_to_dev(pf),
+ "Error setting promisc mode on VSI %i for VID %u (rc=%d)\n",
+ ice_get_hw_vsi_num(hw, vsi_handle), vid, result);
+
+ return result;
+}
+
+/**
+ * ice_fltr_add_mac_list - add list of MAC filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+int ice_fltr_add_mac_list(struct ice_vsi *vsi, struct list_head *list)
+{
+ return ice_add_mac(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_remove_mac_list - remove list of MAC filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+int ice_fltr_remove_mac_list(struct ice_vsi *vsi, struct list_head *list)
+{
+ return ice_remove_mac(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_add_vlan_list - add list of VLAN filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+static int ice_fltr_add_vlan_list(struct ice_vsi *vsi, struct list_head *list)
+{
+ return ice_add_vlan(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_remove_vlan_list - remove list of VLAN filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+static int
+ice_fltr_remove_vlan_list(struct ice_vsi *vsi, struct list_head *list)
+{
+ return ice_remove_vlan(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_add_eth_list - add list of ethertype filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+static int ice_fltr_add_eth_list(struct ice_vsi *vsi, struct list_head *list)
+{
+ return ice_add_eth_mac(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_remove_eth_list - remove list of ethertype filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+static int ice_fltr_remove_eth_list(struct ice_vsi *vsi, struct list_head *list)
+{
+ return ice_remove_eth_mac(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_remove_all - remove all filters associated with VSI
+ * @vsi: pointer to VSI struct
+ */
+void ice_fltr_remove_all(struct ice_vsi *vsi)
+{
+ ice_remove_vsi_fltr(&vsi->back->hw, vsi->idx);
+}
+
+/**
+ * ice_fltr_add_mac_to_list - add MAC filter info to exsisting list
+ * @vsi: pointer to VSI struct
+ * @list: list to add filter info to
+ * @mac: MAC address to add
+ * @action: filter action
+ */
+int
+ice_fltr_add_mac_to_list(struct ice_vsi *vsi, struct list_head *list,
+ const u8 *mac, enum ice_sw_fwd_act_type action)
+{
+ struct ice_fltr_info info = { 0 };
+
+ info.flag = ICE_FLTR_TX;
+ info.src_id = ICE_SRC_ID_VSI;
+ info.lkup_type = ICE_SW_LKUP_MAC;
+ info.fltr_act = action;
+ info.vsi_handle = vsi->idx;
+
+ ether_addr_copy(info.l_data.mac.mac_addr, mac);
+
+ return ice_fltr_add_entry_to_list(ice_pf_to_dev(vsi->back), &info,
+ list);
+}
+
+/**
+ * ice_fltr_add_vlan_to_list - add VLAN filter info to exsisting list
+ * @vsi: pointer to VSI struct
+ * @list: list to add filter info to
+ * @vlan: VLAN filter details
+ */
+static int
+ice_fltr_add_vlan_to_list(struct ice_vsi *vsi, struct list_head *list,
+ struct ice_vlan *vlan)
+{
+ struct ice_fltr_info info = { 0 };
+
+ info.flag = ICE_FLTR_TX;
+ info.src_id = ICE_SRC_ID_VSI;
+ info.lkup_type = ICE_SW_LKUP_VLAN;
+ info.fltr_act = ICE_FWD_TO_VSI;
+ info.vsi_handle = vsi->idx;
+ info.l_data.vlan.vlan_id = vlan->vid;
+ info.l_data.vlan.tpid = vlan->tpid;
+ info.l_data.vlan.tpid_valid = true;
+
+ return ice_fltr_add_entry_to_list(ice_pf_to_dev(vsi->back), &info,
+ list);
+}
+
+/**
+ * ice_fltr_add_eth_to_list - add ethertype filter info to exsisting list
+ * @vsi: pointer to VSI struct
+ * @list: list to add filter info to
+ * @ethertype: ethertype of packet that matches filter
+ * @flag: filter direction, Tx or Rx
+ * @action: filter action
+ */
+static int
+ice_fltr_add_eth_to_list(struct ice_vsi *vsi, struct list_head *list,
+ u16 ethertype, u16 flag,
+ enum ice_sw_fwd_act_type action)
+{
+ struct ice_fltr_info info = { 0 };
+
+ info.flag = flag;
+ info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
+ info.fltr_act = action;
+ info.vsi_handle = vsi->idx;
+ info.l_data.ethertype_mac.ethertype = ethertype;
+
+ if (flag == ICE_FLTR_TX)
+ info.src_id = ICE_SRC_ID_VSI;
+ else
+ info.src_id = ICE_SRC_ID_LPORT;
+
+ return ice_fltr_add_entry_to_list(ice_pf_to_dev(vsi->back), &info,
+ list);
+}
+
+/**
+ * ice_fltr_prepare_mac - add or remove MAC rule
+ * @vsi: pointer to VSI struct
+ * @mac: MAC address to add
+ * @action: action to be performed on filter match
+ * @mac_action: pointer to add or remove MAC function
+ */
+static int
+ice_fltr_prepare_mac(struct ice_vsi *vsi, const u8 *mac,
+ enum ice_sw_fwd_act_type action,
+ int (*mac_action)(struct ice_vsi *, struct list_head *))
+{
+ LIST_HEAD(tmp_list);
+ int result;
+
+ if (ice_fltr_add_mac_to_list(vsi, &tmp_list, mac, action)) {
+ ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+ return -ENOMEM;
+ }
+
+ result = mac_action(vsi, &tmp_list);
+ ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+ return result;
+}
+
+/**
+ * ice_fltr_prepare_mac_and_broadcast - add or remove MAC and broadcast filter
+ * @vsi: pointer to VSI struct
+ * @mac: MAC address to add
+ * @action: action to be performed on filter match
+ * @mac_action: pointer to add or remove MAC function
+ */
+static int
+ice_fltr_prepare_mac_and_broadcast(struct ice_vsi *vsi, const u8 *mac,
+ enum ice_sw_fwd_act_type action,
+ int(*mac_action)
+ (struct ice_vsi *, struct list_head *))
+{
+ u8 broadcast[ETH_ALEN];
+ LIST_HEAD(tmp_list);
+ int result;
+
+ eth_broadcast_addr(broadcast);
+ if (ice_fltr_add_mac_to_list(vsi, &tmp_list, mac, action) ||
+ ice_fltr_add_mac_to_list(vsi, &tmp_list, broadcast, action)) {
+ ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+ return -ENOMEM;
+ }
+
+ result = mac_action(vsi, &tmp_list);
+ ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+ return result;
+}
+
+/**
+ * ice_fltr_prepare_vlan - add or remove VLAN filter
+ * @vsi: pointer to VSI struct
+ * @vlan: VLAN filter details
+ * @vlan_action: pointer to add or remove VLAN function
+ */
+static int
+ice_fltr_prepare_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan,
+ int (*vlan_action)(struct ice_vsi *, struct list_head *))
+{
+ LIST_HEAD(tmp_list);
+ int result;
+
+ if (ice_fltr_add_vlan_to_list(vsi, &tmp_list, vlan))
+ return -ENOMEM;
+
+ result = vlan_action(vsi, &tmp_list);
+ ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+ return result;
+}
+
+/**
+ * ice_fltr_prepare_eth - add or remove ethertype filter
+ * @vsi: pointer to VSI struct
+ * @ethertype: ethertype of packet to be filtered
+ * @flag: direction of packet, Tx or Rx
+ * @action: action to be performed on filter match
+ * @eth_action: pointer to add or remove ethertype function
+ */
+static int
+ice_fltr_prepare_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+ enum ice_sw_fwd_act_type action,
+ int (*eth_action)(struct ice_vsi *, struct list_head *))
+{
+ LIST_HEAD(tmp_list);
+ int result;
+
+ if (ice_fltr_add_eth_to_list(vsi, &tmp_list, ethertype, flag, action))
+ return -ENOMEM;
+
+ result = eth_action(vsi, &tmp_list);
+ ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+ return result;
+}
+
+/**
+ * ice_fltr_add_mac - add single MAC filter
+ * @vsi: pointer to VSI struct
+ * @mac: MAC to add
+ * @action: action to be performed on filter match
+ */
+int ice_fltr_add_mac(struct ice_vsi *vsi, const u8 *mac,
+ enum ice_sw_fwd_act_type action)
+{
+ return ice_fltr_prepare_mac(vsi, mac, action, ice_fltr_add_mac_list);
+}
+
+/**
+ * ice_fltr_add_mac_and_broadcast - add single MAC and broadcast
+ * @vsi: pointer to VSI struct
+ * @mac: MAC to add
+ * @action: action to be performed on filter match
+ */
+int
+ice_fltr_add_mac_and_broadcast(struct ice_vsi *vsi, const u8 *mac,
+ enum ice_sw_fwd_act_type action)
+{
+ return ice_fltr_prepare_mac_and_broadcast(vsi, mac, action,
+ ice_fltr_add_mac_list);
+}
+
+/**
+ * ice_fltr_remove_mac - remove MAC filter
+ * @vsi: pointer to VSI struct
+ * @mac: filter MAC to remove
+ * @action: action to remove
+ */
+int ice_fltr_remove_mac(struct ice_vsi *vsi, const u8 *mac,
+ enum ice_sw_fwd_act_type action)
+{
+ return ice_fltr_prepare_mac(vsi, mac, action, ice_fltr_remove_mac_list);
+}
+
+/**
+ * ice_fltr_add_vlan - add single VLAN filter
+ * @vsi: pointer to VSI struct
+ * @vlan: VLAN filter details
+ */
+int ice_fltr_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+ return ice_fltr_prepare_vlan(vsi, vlan, ice_fltr_add_vlan_list);
+}
+
+/**
+ * ice_fltr_remove_vlan - remove VLAN filter
+ * @vsi: pointer to VSI struct
+ * @vlan: VLAN filter details
+ */
+int ice_fltr_remove_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+ return ice_fltr_prepare_vlan(vsi, vlan, ice_fltr_remove_vlan_list);
+}
+
+/**
+ * ice_fltr_add_eth - add specyfic ethertype filter
+ * @vsi: pointer to VSI struct
+ * @ethertype: ethertype of filter
+ * @flag: direction of packet to be filtered, Tx or Rx
+ * @action: action to be performed on filter match
+ */
+int ice_fltr_add_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+ enum ice_sw_fwd_act_type action)
+{
+ return ice_fltr_prepare_eth(vsi, ethertype, flag, action,
+ ice_fltr_add_eth_list);
+}
+
+/**
+ * ice_fltr_remove_eth - remove ethertype filter
+ * @vsi: pointer to VSI struct
+ * @ethertype: ethertype of filter
+ * @flag: direction of filter
+ * @action: action to remove
+ */
+int ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+ enum ice_sw_fwd_act_type action)
+{
+ return ice_fltr_prepare_eth(vsi, ethertype, flag, action,
+ ice_fltr_remove_eth_list);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.h b/drivers/net/ethernet/intel/ice/ice_fltr.h
new file mode 100644
index 000000000..0f3dbc308
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fltr.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#ifndef _ICE_FLTR_H_
+#define _ICE_FLTR_H_
+
+#include "ice_vlan.h"
+
+void ice_fltr_free_list(struct device *dev, struct list_head *h);
+int
+ice_fltr_set_vlan_vsi_promisc(struct ice_hw *hw, struct ice_vsi *vsi,
+ u8 promisc_mask);
+int
+ice_fltr_clear_vlan_vsi_promisc(struct ice_hw *hw, struct ice_vsi *vsi,
+ u8 promisc_mask);
+int
+ice_fltr_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+ u16 vid);
+int
+ice_fltr_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+ u16 vid);
+int
+ice_fltr_add_mac_to_list(struct ice_vsi *vsi, struct list_head *list,
+ const u8 *mac, enum ice_sw_fwd_act_type action);
+int
+ice_fltr_add_mac(struct ice_vsi *vsi, const u8 *mac,
+ enum ice_sw_fwd_act_type action);
+int
+ice_fltr_add_mac_and_broadcast(struct ice_vsi *vsi, const u8 *mac,
+ enum ice_sw_fwd_act_type action);
+int ice_fltr_add_mac_list(struct ice_vsi *vsi, struct list_head *list);
+int
+ice_fltr_remove_mac(struct ice_vsi *vsi, const u8 *mac,
+ enum ice_sw_fwd_act_type action);
+int ice_fltr_remove_mac_list(struct ice_vsi *vsi, struct list_head *list);
+
+int ice_fltr_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+int ice_fltr_remove_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+
+int
+ice_fltr_add_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+ enum ice_sw_fwd_act_type action);
+int
+ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+ enum ice_sw_fwd_act_type action);
+void ice_fltr_remove_all(struct ice_vsi *vsi);
+
+int
+ice_fltr_update_flags(struct ice_vsi *vsi, u16 rule_id, u16 recipe_id,
+ u32 new_flags);
+#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c
new file mode 100644
index 000000000..3dc5662d6
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c
@@ -0,0 +1,1051 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2019, Intel Corporation. */
+
+#include <asm/unaligned.h>
+#include <linux/uuid.h>
+#include <linux/crc32.h>
+#include <linux/pldmfw.h>
+#include "ice.h"
+#include "ice_fw_update.h"
+
+struct ice_fwu_priv {
+ struct pldmfw context;
+
+ struct ice_pf *pf;
+ struct netlink_ext_ack *extack;
+
+ /* Track which NVM banks to activate at the end of the update */
+ u8 activate_flags;
+
+ /* Track the firmware response of the required reset to complete the
+ * flash update.
+ *
+ * 0 - ICE_AQC_NVM_POR_FLAG - A full power on is required
+ * 1 - ICE_AQC_NVM_PERST_FLAG - A cold PCIe reset is required
+ * 2 - ICE_AQC_NVM_EMPR_FLAG - An EMP reset is required
+ */
+ u8 reset_level;
+
+ /* Track if EMP reset is available */
+ u8 emp_reset_available;
+};
+
+/**
+ * ice_send_package_data - Send record package data to firmware
+ * @context: PLDM fw update structure
+ * @data: pointer to the package data
+ * @length: length of the package data
+ *
+ * Send a copy of the package data associated with the PLDM record matching
+ * this device to the firmware.
+ *
+ * Note that this function sends an AdminQ command that will fail unless the
+ * NVM resource has been acquired.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_send_package_data(struct pldmfw *context, const u8 *data, u16 length)
+{
+ struct ice_fwu_priv *priv = container_of(context, struct ice_fwu_priv, context);
+ struct netlink_ext_ack *extack = priv->extack;
+ struct device *dev = context->dev;
+ struct ice_pf *pf = priv->pf;
+ struct ice_hw *hw = &pf->hw;
+ u8 *package_data;
+ int status;
+
+ dev_dbg(dev, "Sending PLDM record package data to firmware\n");
+
+ package_data = kmemdup(data, length, GFP_KERNEL);
+ if (!package_data)
+ return -ENOMEM;
+
+ status = ice_nvm_set_pkg_data(hw, false, package_data, length, NULL);
+
+ kfree(package_data);
+
+ if (status) {
+ dev_err(dev, "Failed to send record package data to firmware, err %d aq_err %s\n",
+ status, ice_aq_str(hw->adminq.sq_last_status));
+ NL_SET_ERR_MSG_MOD(extack, "Failed to record package data to firmware");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_check_component_response - Report firmware response to a component
+ * @pf: device private data structure
+ * @id: component id being checked
+ * @response: indicates whether this component can be updated
+ * @code: code indicating reason for response
+ * @extack: netlink extended ACK structure
+ *
+ * Check whether firmware indicates if this component can be updated. Report
+ * a suitable error message over the netlink extended ACK if the component
+ * cannot be updated.
+ *
+ * Returns: zero if the component can be updated, or -ECANCELED of the
+ * firmware indicates the component cannot be updated.
+ */
+static int
+ice_check_component_response(struct ice_pf *pf, u16 id, u8 response, u8 code,
+ struct netlink_ext_ack *extack)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ const char *component;
+
+ switch (id) {
+ case NVM_COMP_ID_OROM:
+ component = "fw.undi";
+ break;
+ case NVM_COMP_ID_NVM:
+ component = "fw.mgmt";
+ break;
+ case NVM_COMP_ID_NETLIST:
+ component = "fw.netlist";
+ break;
+ default:
+ WARN(1, "Unexpected unknown component identifier 0x%02x", id);
+ return -EINVAL;
+ }
+
+ dev_dbg(dev, "%s: firmware response 0x%x, code 0x%x\n",
+ component, response, code);
+
+ switch (response) {
+ case ICE_AQ_NVM_PASS_COMP_CAN_BE_UPDATED:
+ /* firmware indicated this update is good to proceed */
+ return 0;
+ case ICE_AQ_NVM_PASS_COMP_CAN_MAY_BE_UPDATEABLE:
+ dev_warn(dev, "firmware recommends not updating %s, as it may result in a downgrade. continuing anyways\n", component);
+ return 0;
+ case ICE_AQ_NVM_PASS_COMP_CAN_NOT_BE_UPDATED:
+ dev_info(dev, "firmware has rejected updating %s\n", component);
+ break;
+ }
+
+ switch (code) {
+ case ICE_AQ_NVM_PASS_COMP_STAMP_IDENTICAL_CODE:
+ dev_err(dev, "Component comparison stamp for %s is identical to the running image\n",
+ component);
+ NL_SET_ERR_MSG_MOD(extack, "Component comparison stamp is identical to running image");
+ break;
+ case ICE_AQ_NVM_PASS_COMP_STAMP_LOWER:
+ dev_err(dev, "Component comparison stamp for %s is lower than the running image\n",
+ component);
+ NL_SET_ERR_MSG_MOD(extack, "Component comparison stamp is lower than running image");
+ break;
+ case ICE_AQ_NVM_PASS_COMP_INVALID_STAMP_CODE:
+ dev_err(dev, "Component comparison stamp for %s is invalid\n",
+ component);
+ NL_SET_ERR_MSG_MOD(extack, "Component comparison stamp is invalid");
+ break;
+ case ICE_AQ_NVM_PASS_COMP_CONFLICT_CODE:
+ dev_err(dev, "%s conflicts with a previous component table\n",
+ component);
+ NL_SET_ERR_MSG_MOD(extack, "Component table conflict occurred");
+ break;
+ case ICE_AQ_NVM_PASS_COMP_PRE_REQ_NOT_MET_CODE:
+ dev_err(dev, "Pre-requisites for component %s have not been met\n",
+ component);
+ NL_SET_ERR_MSG_MOD(extack, "Component pre-requisites not met");
+ break;
+ case ICE_AQ_NVM_PASS_COMP_NOT_SUPPORTED_CODE:
+ dev_err(dev, "%s is not a supported component\n",
+ component);
+ NL_SET_ERR_MSG_MOD(extack, "Component not supported");
+ break;
+ case ICE_AQ_NVM_PASS_COMP_CANNOT_DOWNGRADE_CODE:
+ dev_err(dev, "Security restrictions prevent %s from being downgraded\n",
+ component);
+ NL_SET_ERR_MSG_MOD(extack, "Component cannot be downgraded");
+ break;
+ case ICE_AQ_NVM_PASS_COMP_INCOMPLETE_IMAGE_CODE:
+ dev_err(dev, "Received an incomplete component image for %s\n",
+ component);
+ NL_SET_ERR_MSG_MOD(extack, "Incomplete component image");
+ break;
+ case ICE_AQ_NVM_PASS_COMP_VER_STR_IDENTICAL_CODE:
+ dev_err(dev, "Component version for %s is identical to the running image\n",
+ component);
+ NL_SET_ERR_MSG_MOD(extack, "Component version is identical to running image");
+ break;
+ case ICE_AQ_NVM_PASS_COMP_VER_STR_LOWER_CODE:
+ dev_err(dev, "Component version for %s is lower than the running image\n",
+ component);
+ NL_SET_ERR_MSG_MOD(extack, "Component version is lower than the running image");
+ break;
+ default:
+ dev_err(dev, "Unexpected response code 0x02%x for %s\n",
+ code, component);
+ NL_SET_ERR_MSG_MOD(extack, "Received unexpected response code from firmware");
+ break;
+ }
+
+ return -ECANCELED;
+}
+
+/**
+ * ice_send_component_table - Send PLDM component table to firmware
+ * @context: PLDM fw update structure
+ * @component: the component to process
+ * @transfer_flag: relative transfer order of this component
+ *
+ * Read relevant data from the component and forward it to the device
+ * firmware. Check the response to determine if the firmware indicates that
+ * the update can proceed.
+ *
+ * This function sends AdminQ commands related to the NVM, and assumes that
+ * the NVM resource has been acquired.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_send_component_table(struct pldmfw *context, struct pldmfw_component *component,
+ u8 transfer_flag)
+{
+ struct ice_fwu_priv *priv = container_of(context, struct ice_fwu_priv, context);
+ struct netlink_ext_ack *extack = priv->extack;
+ struct ice_aqc_nvm_comp_tbl *comp_tbl;
+ u8 comp_response, comp_response_code;
+ struct device *dev = context->dev;
+ struct ice_pf *pf = priv->pf;
+ struct ice_hw *hw = &pf->hw;
+ size_t length;
+ int status;
+
+ switch (component->identifier) {
+ case NVM_COMP_ID_OROM:
+ case NVM_COMP_ID_NVM:
+ case NVM_COMP_ID_NETLIST:
+ break;
+ default:
+ dev_err(dev, "Unable to update due to a firmware component with unknown ID %u\n",
+ component->identifier);
+ NL_SET_ERR_MSG_MOD(extack, "Unable to update due to unknown firmware component");
+ return -EOPNOTSUPP;
+ }
+
+ length = struct_size(comp_tbl, cvs, component->version_len);
+ comp_tbl = kzalloc(length, GFP_KERNEL);
+ if (!comp_tbl)
+ return -ENOMEM;
+
+ comp_tbl->comp_class = cpu_to_le16(component->classification);
+ comp_tbl->comp_id = cpu_to_le16(component->identifier);
+ comp_tbl->comp_class_idx = FWU_COMP_CLASS_IDX_NOT_USE;
+ comp_tbl->comp_cmp_stamp = cpu_to_le32(component->comparison_stamp);
+ comp_tbl->cvs_type = component->version_type;
+ comp_tbl->cvs_len = component->version_len;
+ memcpy(comp_tbl->cvs, component->version_string, component->version_len);
+
+ dev_dbg(dev, "Sending component table to firmware:\n");
+
+ status = ice_nvm_pass_component_tbl(hw, (u8 *)comp_tbl, length,
+ transfer_flag, &comp_response,
+ &comp_response_code, NULL);
+
+ kfree(comp_tbl);
+
+ if (status) {
+ dev_err(dev, "Failed to transfer component table to firmware, err %d aq_err %s\n",
+ status, ice_aq_str(hw->adminq.sq_last_status));
+ NL_SET_ERR_MSG_MOD(extack, "Failed to transfer component table to firmware");
+ return -EIO;
+ }
+
+ return ice_check_component_response(pf, component->identifier, comp_response,
+ comp_response_code, extack);
+}
+
+/**
+ * ice_write_one_nvm_block - Write an NVM block and await completion response
+ * @pf: the PF data structure
+ * @module: the module to write to
+ * @offset: offset in bytes
+ * @block_size: size of the block to write, up to 4k
+ * @block: pointer to block of data to write
+ * @last_cmd: whether this is the last command
+ * @reset_level: storage for reset level required
+ * @extack: netlink extended ACK structure
+ *
+ * Write a block of data to a flash module, and await for the completion
+ * response message from firmware.
+ *
+ * Note this function assumes the caller has acquired the NVM resource.
+ *
+ * On successful return, reset level indicates the device reset required to
+ * complete the update.
+ *
+ * 0 - ICE_AQC_NVM_POR_FLAG - A full power on is required
+ * 1 - ICE_AQC_NVM_PERST_FLAG - A cold PCIe reset is required
+ * 2 - ICE_AQC_NVM_EMPR_FLAG - An EMP reset is required
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_write_one_nvm_block(struct ice_pf *pf, u16 module, u32 offset,
+ u16 block_size, u8 *block, bool last_cmd,
+ u8 *reset_level, struct netlink_ext_ack *extack)
+{
+ u16 completion_module, completion_retval;
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_rq_event_info event;
+ struct ice_hw *hw = &pf->hw;
+ u32 completion_offset;
+ int err;
+
+ memset(&event, 0, sizeof(event));
+
+ dev_dbg(dev, "Writing block of %u bytes for module 0x%02x at offset %u\n",
+ block_size, module, offset);
+
+ err = ice_aq_update_nvm(hw, module, offset, block_size, block,
+ last_cmd, 0, NULL);
+ if (err) {
+ dev_err(dev, "Failed to flash module 0x%02x with block of size %u at offset %u, err %d aq_err %s\n",
+ module, block_size, offset, err,
+ ice_aq_str(hw->adminq.sq_last_status));
+ NL_SET_ERR_MSG_MOD(extack, "Failed to program flash module");
+ return -EIO;
+ }
+
+ /* In most cases, firmware reports a write completion within a few
+ * milliseconds. However, it has been observed that a completion might
+ * take more than a second to complete in some cases. The timeout here
+ * is conservative and is intended to prevent failure to update when
+ * firmware is slow to respond.
+ */
+ err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write, 15 * HZ, &event);
+ if (err) {
+ dev_err(dev, "Timed out while trying to flash module 0x%02x with block of size %u at offset %u, err %d\n",
+ module, block_size, offset, err);
+ NL_SET_ERR_MSG_MOD(extack, "Timed out waiting for firmware");
+ return -EIO;
+ }
+
+ completion_module = le16_to_cpu(event.desc.params.nvm.module_typeid);
+ completion_retval = le16_to_cpu(event.desc.retval);
+
+ completion_offset = le16_to_cpu(event.desc.params.nvm.offset_low);
+ completion_offset |= event.desc.params.nvm.offset_high << 16;
+
+ if (completion_module != module) {
+ dev_err(dev, "Unexpected module_typeid in write completion: got 0x%x, expected 0x%x\n",
+ completion_module, module);
+ NL_SET_ERR_MSG_MOD(extack, "Unexpected firmware response");
+ return -EIO;
+ }
+
+ if (completion_offset != offset) {
+ dev_err(dev, "Unexpected offset in write completion: got %u, expected %u\n",
+ completion_offset, offset);
+ NL_SET_ERR_MSG_MOD(extack, "Unexpected firmware response");
+ return -EIO;
+ }
+
+ if (completion_retval) {
+ dev_err(dev, "Firmware failed to flash module 0x%02x with block of size %u at offset %u, err %s\n",
+ module, block_size, offset,
+ ice_aq_str((enum ice_aq_err)completion_retval));
+ NL_SET_ERR_MSG_MOD(extack, "Firmware failed to program flash module");
+ return -EIO;
+ }
+
+ /* For the last command to write the NVM bank, newer versions of
+ * firmware indicate the required level of reset to complete
+ * activation of firmware. If the firmware supports this, cache the
+ * response for indicating to the user later. Otherwise, assume that
+ * a full power cycle is required.
+ */
+ if (reset_level && last_cmd && module == ICE_SR_1ST_NVM_BANK_PTR) {
+ if (hw->dev_caps.common_cap.pcie_reset_avoidance) {
+ *reset_level = (event.desc.params.nvm.cmd_flags &
+ ICE_AQC_NVM_RESET_LVL_M);
+ dev_dbg(dev, "Firmware reported required reset level as %u\n",
+ *reset_level);
+ } else {
+ *reset_level = ICE_AQC_NVM_POR_FLAG;
+ dev_dbg(dev, "Firmware doesn't support indicating required reset level. Assuming a power cycle is required\n");
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_write_nvm_module - Write data to an NVM module
+ * @pf: the PF driver structure
+ * @module: the module id to program
+ * @component: the name of the component being updated
+ * @image: buffer of image data to write to the NVM
+ * @length: length of the buffer
+ * @reset_level: storage for reset level required
+ * @extack: netlink extended ACK structure
+ *
+ * Loop over the data for a given NVM module and program it in 4 Kb
+ * blocks. Notify devlink core of progress after each block is programmed.
+ * Loops over a block of data and programs the NVM in 4k block chunks.
+ *
+ * Note this function assumes the caller has acquired the NVM resource.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_write_nvm_module(struct ice_pf *pf, u16 module, const char *component,
+ const u8 *image, u32 length, u8 *reset_level,
+ struct netlink_ext_ack *extack)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct devlink *devlink;
+ u32 offset = 0;
+ bool last_cmd;
+ u8 *block;
+ int err;
+
+ dev_dbg(dev, "Beginning write of flash component '%s', module 0x%02x\n", component, module);
+
+ devlink = priv_to_devlink(pf);
+
+ devlink_flash_update_status_notify(devlink, "Flashing",
+ component, 0, length);
+
+ block = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
+ if (!block)
+ return -ENOMEM;
+
+ do {
+ u32 block_size;
+
+ block_size = min_t(u32, ICE_AQ_MAX_BUF_LEN, length - offset);
+ last_cmd = !(offset + block_size < length);
+
+ /* ice_aq_update_nvm may copy the firmware response into the
+ * buffer, so we must make a copy since the source data is
+ * constant.
+ */
+ memcpy(block, image + offset, block_size);
+
+ err = ice_write_one_nvm_block(pf, module, offset, block_size,
+ block, last_cmd, reset_level,
+ extack);
+ if (err)
+ break;
+
+ offset += block_size;
+
+ devlink_flash_update_status_notify(devlink, "Flashing",
+ component, offset, length);
+ } while (!last_cmd);
+
+ dev_dbg(dev, "Completed write of flash component '%s', module 0x%02x\n", component, module);
+
+ if (err)
+ devlink_flash_update_status_notify(devlink, "Flashing failed",
+ component, length, length);
+ else
+ devlink_flash_update_status_notify(devlink, "Flashing done",
+ component, length, length);
+
+ kfree(block);
+ return err;
+}
+
+/* Length in seconds to wait before timing out when erasing a flash module.
+ * Yes, erasing really can take minutes to complete.
+ */
+#define ICE_FW_ERASE_TIMEOUT 300
+
+/**
+ * ice_erase_nvm_module - Erase an NVM module and await firmware completion
+ * @pf: the PF data structure
+ * @module: the module to erase
+ * @component: name of the component being updated
+ * @extack: netlink extended ACK structure
+ *
+ * Erase the inactive NVM bank associated with this module, and await for
+ * a completion response message from firmware.
+ *
+ * Note this function assumes the caller has acquired the NVM resource.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_erase_nvm_module(struct ice_pf *pf, u16 module, const char *component,
+ struct netlink_ext_ack *extack)
+{
+ u16 completion_module, completion_retval;
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_rq_event_info event;
+ struct ice_hw *hw = &pf->hw;
+ struct devlink *devlink;
+ int err;
+
+ dev_dbg(dev, "Beginning erase of flash component '%s', module 0x%02x\n", component, module);
+
+ memset(&event, 0, sizeof(event));
+
+ devlink = priv_to_devlink(pf);
+
+ devlink_flash_update_timeout_notify(devlink, "Erasing", component, ICE_FW_ERASE_TIMEOUT);
+
+ err = ice_aq_erase_nvm(hw, module, NULL);
+ if (err) {
+ dev_err(dev, "Failed to erase %s (module 0x%02x), err %d aq_err %s\n",
+ component, module, err,
+ ice_aq_str(hw->adminq.sq_last_status));
+ NL_SET_ERR_MSG_MOD(extack, "Failed to erase flash module");
+ err = -EIO;
+ goto out_notify_devlink;
+ }
+
+ err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_erase, ICE_FW_ERASE_TIMEOUT * HZ, &event);
+ if (err) {
+ dev_err(dev, "Timed out waiting for firmware to respond with erase completion for %s (module 0x%02x), err %d\n",
+ component, module, err);
+ NL_SET_ERR_MSG_MOD(extack, "Timed out waiting for firmware");
+ goto out_notify_devlink;
+ }
+
+ completion_module = le16_to_cpu(event.desc.params.nvm.module_typeid);
+ completion_retval = le16_to_cpu(event.desc.retval);
+
+ if (completion_module != module) {
+ dev_err(dev, "Unexpected module_typeid in erase completion for %s: got 0x%x, expected 0x%x\n",
+ component, completion_module, module);
+ NL_SET_ERR_MSG_MOD(extack, "Unexpected firmware response");
+ err = -EIO;
+ goto out_notify_devlink;
+ }
+
+ if (completion_retval) {
+ dev_err(dev, "Firmware failed to erase %s (module 0x02%x), aq_err %s\n",
+ component, module,
+ ice_aq_str((enum ice_aq_err)completion_retval));
+ NL_SET_ERR_MSG_MOD(extack, "Firmware failed to erase flash");
+ err = -EIO;
+ goto out_notify_devlink;
+ }
+
+ dev_dbg(dev, "Completed erase of flash component '%s', module 0x%02x\n", component, module);
+
+out_notify_devlink:
+ if (err)
+ devlink_flash_update_status_notify(devlink, "Erasing failed",
+ component, 0, 0);
+ else
+ devlink_flash_update_status_notify(devlink, "Erasing done",
+ component, 0, 0);
+
+ return err;
+}
+
+/**
+ * ice_switch_flash_banks - Tell firmware to switch NVM banks
+ * @pf: Pointer to the PF data structure
+ * @activate_flags: flags used for the activation command
+ * @emp_reset_available: on return, indicates if EMP reset is available
+ * @extack: netlink extended ACK structure
+ *
+ * Notify firmware to activate the newly written flash banks, and wait for the
+ * firmware response.
+ *
+ * Returns: zero on success or an error code on failure.
+ */
+static int
+ice_switch_flash_banks(struct ice_pf *pf, u8 activate_flags,
+ u8 *emp_reset_available, struct netlink_ext_ack *extack)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_rq_event_info event;
+ struct ice_hw *hw = &pf->hw;
+ u16 completion_retval;
+ u8 response_flags;
+ int err;
+
+ memset(&event, 0, sizeof(event));
+
+ err = ice_nvm_write_activate(hw, activate_flags, &response_flags);
+ if (err) {
+ dev_err(dev, "Failed to switch active flash banks, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ NL_SET_ERR_MSG_MOD(extack, "Failed to switch active flash banks");
+ return -EIO;
+ }
+
+ /* Newer versions of firmware have support to indicate whether an EMP
+ * reset to reload firmware is available. For older firmware, EMP
+ * reset is always available.
+ */
+ if (emp_reset_available) {
+ if (hw->dev_caps.common_cap.reset_restrict_support) {
+ *emp_reset_available = response_flags & ICE_AQC_NVM_EMPR_ENA;
+ dev_dbg(dev, "Firmware indicated that EMP reset is %s\n",
+ *emp_reset_available ?
+ "available" : "not available");
+ } else {
+ *emp_reset_available = ICE_AQC_NVM_EMPR_ENA;
+ dev_dbg(dev, "Firmware does not support restricting EMP reset availability\n");
+ }
+ }
+
+ err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write_activate, 30 * HZ,
+ &event);
+ if (err) {
+ dev_err(dev, "Timed out waiting for firmware to switch active flash banks, err %d\n",
+ err);
+ NL_SET_ERR_MSG_MOD(extack, "Timed out waiting for firmware");
+ return err;
+ }
+
+ completion_retval = le16_to_cpu(event.desc.retval);
+ if (completion_retval) {
+ dev_err(dev, "Firmware failed to switch active flash banks aq_err %s\n",
+ ice_aq_str((enum ice_aq_err)completion_retval));
+ NL_SET_ERR_MSG_MOD(extack, "Firmware failed to switch active flash banks");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_flash_component - Flash a component of the NVM
+ * @context: PLDM fw update structure
+ * @component: the component table to program
+ *
+ * Program the flash contents for a given component. First, determine the
+ * module id. Then, erase the secondary bank for this module. Finally, write
+ * the contents of the component to the NVM.
+ *
+ * Note this function assumes the caller has acquired the NVM resource.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_flash_component(struct pldmfw *context, struct pldmfw_component *component)
+{
+ struct ice_fwu_priv *priv = container_of(context, struct ice_fwu_priv, context);
+ struct netlink_ext_ack *extack = priv->extack;
+ struct ice_pf *pf = priv->pf;
+ const char *name;
+ u8 *reset_level;
+ u16 module;
+ u8 flag;
+ int err;
+
+ switch (component->identifier) {
+ case NVM_COMP_ID_OROM:
+ module = ICE_SR_1ST_OROM_BANK_PTR;
+ flag = ICE_AQC_NVM_ACTIV_SEL_OROM;
+ reset_level = NULL;
+ name = "fw.undi";
+ break;
+ case NVM_COMP_ID_NVM:
+ module = ICE_SR_1ST_NVM_BANK_PTR;
+ flag = ICE_AQC_NVM_ACTIV_SEL_NVM;
+ reset_level = &priv->reset_level;
+ name = "fw.mgmt";
+ break;
+ case NVM_COMP_ID_NETLIST:
+ module = ICE_SR_NETLIST_BANK_PTR;
+ flag = ICE_AQC_NVM_ACTIV_SEL_NETLIST;
+ reset_level = NULL;
+ name = "fw.netlist";
+ break;
+ default:
+ /* This should not trigger, since we check the id before
+ * sending the component table to firmware.
+ */
+ WARN(1, "Unexpected unknown component identifier 0x%02x",
+ component->identifier);
+ return -EINVAL;
+ }
+
+ /* Mark this component for activating at the end */
+ priv->activate_flags |= flag;
+
+ err = ice_erase_nvm_module(pf, module, name, extack);
+ if (err)
+ return err;
+
+ return ice_write_nvm_module(pf, module, name, component->component_data,
+ component->component_size, reset_level,
+ extack);
+}
+
+/**
+ * ice_finalize_update - Perform last steps to complete device update
+ * @context: PLDM fw update structure
+ *
+ * Called as the last step of the update process. Complete the update by
+ * telling the firmware to switch active banks, and perform a reset of
+ * configured.
+ *
+ * Returns: 0 on success, or an error code on failure.
+ */
+static int ice_finalize_update(struct pldmfw *context)
+{
+ struct ice_fwu_priv *priv = container_of(context, struct ice_fwu_priv, context);
+ struct netlink_ext_ack *extack = priv->extack;
+ struct ice_pf *pf = priv->pf;
+ struct devlink *devlink;
+ int err;
+
+ /* Finally, notify firmware to activate the written NVM banks */
+ err = ice_switch_flash_banks(pf, priv->activate_flags,
+ &priv->emp_reset_available, extack);
+ if (err)
+ return err;
+
+ devlink = priv_to_devlink(pf);
+
+ /* If the required reset is EMPR, but EMPR is disabled, report that
+ * a reboot is required instead.
+ */
+ if (priv->reset_level == ICE_AQC_NVM_EMPR_FLAG &&
+ !priv->emp_reset_available) {
+ dev_dbg(ice_pf_to_dev(pf), "Firmware indicated EMP reset as sufficient, but EMP reset is disabled\n");
+ priv->reset_level = ICE_AQC_NVM_PERST_FLAG;
+ }
+
+ switch (priv->reset_level) {
+ case ICE_AQC_NVM_EMPR_FLAG:
+ devlink_flash_update_status_notify(devlink,
+ "Activate new firmware by devlink reload",
+ NULL, 0, 0);
+ break;
+ case ICE_AQC_NVM_PERST_FLAG:
+ devlink_flash_update_status_notify(devlink,
+ "Activate new firmware by rebooting the system",
+ NULL, 0, 0);
+ break;
+ case ICE_AQC_NVM_POR_FLAG:
+ default:
+ devlink_flash_update_status_notify(devlink,
+ "Activate new firmware by power cycling the system",
+ NULL, 0, 0);
+ break;
+ }
+
+ pf->fw_emp_reset_disabled = !priv->emp_reset_available;
+
+ return 0;
+}
+
+struct ice_pldm_pci_record_id {
+ u32 vendor;
+ u32 device;
+ u32 subsystem_vendor;
+ u32 subsystem_device;
+};
+
+/**
+ * ice_op_pci_match_record - Check if a PCI device matches the record
+ * @context: PLDM fw update structure
+ * @record: list of records extracted from the PLDM image
+ *
+ * Determine if the PCI device associated with this device matches the record
+ * data provided.
+ *
+ * Searches the descriptor TLVs and extracts the relevant descriptor data into
+ * a pldm_pci_record_id. This is then compared against the PCI device ID
+ * information.
+ *
+ * Returns: true if the device matches the record, false otherwise.
+ */
+static bool
+ice_op_pci_match_record(struct pldmfw *context, struct pldmfw_record *record)
+{
+ struct pci_dev *pdev = to_pci_dev(context->dev);
+ struct ice_pldm_pci_record_id id = {
+ .vendor = PCI_ANY_ID,
+ .device = PCI_ANY_ID,
+ .subsystem_vendor = PCI_ANY_ID,
+ .subsystem_device = PCI_ANY_ID,
+ };
+ struct pldmfw_desc_tlv *desc;
+
+ list_for_each_entry(desc, &record->descs, entry) {
+ u16 value;
+ int *ptr;
+
+ switch (desc->type) {
+ case PLDM_DESC_ID_PCI_VENDOR_ID:
+ ptr = &id.vendor;
+ break;
+ case PLDM_DESC_ID_PCI_DEVICE_ID:
+ ptr = &id.device;
+ break;
+ case PLDM_DESC_ID_PCI_SUBVENDOR_ID:
+ ptr = &id.subsystem_vendor;
+ break;
+ case PLDM_DESC_ID_PCI_SUBDEV_ID:
+ ptr = &id.subsystem_device;
+ break;
+ default:
+ /* Skip unrelated TLVs */
+ continue;
+ }
+
+ value = get_unaligned_le16(desc->data);
+ /* A value of zero for one of the descriptors is sometimes
+ * used when the record should ignore this field when matching
+ * device. For example if the record applies to any subsystem
+ * device or vendor.
+ */
+ if (value)
+ *ptr = value;
+ else
+ *ptr = PCI_ANY_ID;
+ }
+
+ /* the E822 device can have a generic device ID so check for that */
+ if ((id.vendor == PCI_ANY_ID || id.vendor == pdev->vendor) &&
+ (id.device == PCI_ANY_ID || id.device == pdev->device ||
+ id.device == ICE_DEV_ID_E822_SI_DFLT) &&
+ (id.subsystem_vendor == PCI_ANY_ID ||
+ id.subsystem_vendor == pdev->subsystem_vendor) &&
+ (id.subsystem_device == PCI_ANY_ID ||
+ id.subsystem_device == pdev->subsystem_device))
+ return true;
+
+ return false;
+}
+
+static const struct pldmfw_ops ice_fwu_ops_e810 = {
+ .match_record = &pldmfw_op_pci_match_record,
+ .send_package_data = &ice_send_package_data,
+ .send_component_table = &ice_send_component_table,
+ .flash_component = &ice_flash_component,
+ .finalize_update = &ice_finalize_update,
+};
+
+static const struct pldmfw_ops ice_fwu_ops_e822 = {
+ .match_record = &ice_op_pci_match_record,
+ .send_package_data = &ice_send_package_data,
+ .send_component_table = &ice_send_component_table,
+ .flash_component = &ice_flash_component,
+ .finalize_update = &ice_finalize_update,
+};
+
+/**
+ * ice_get_pending_updates - Check if the component has a pending update
+ * @pf: the PF driver structure
+ * @pending: on return, bitmap of updates pending
+ * @extack: Netlink extended ACK
+ *
+ * Check if the device has any pending updates on any flash components.
+ *
+ * Returns: zero on success, or a negative error code on failure. Updates
+ * pending with the bitmap of pending updates.
+ */
+int ice_get_pending_updates(struct ice_pf *pf, u8 *pending,
+ struct netlink_ext_ack *extack)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw_dev_caps *dev_caps;
+ struct ice_hw *hw = &pf->hw;
+ int err;
+
+ dev_caps = kzalloc(sizeof(*dev_caps), GFP_KERNEL);
+ if (!dev_caps)
+ return -ENOMEM;
+
+ /* Read the most recent device capabilities from firmware. Do not use
+ * the cached values in hw->dev_caps, because the pending update flag
+ * may have changed, e.g. if an update was previously completed and
+ * the system has not yet rebooted.
+ */
+ err = ice_discover_dev_caps(hw, dev_caps);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Unable to read device capabilities");
+ kfree(dev_caps);
+ return err;
+ }
+
+ *pending = 0;
+
+ if (dev_caps->common_cap.nvm_update_pending_nvm) {
+ dev_info(dev, "The fw.mgmt flash component has a pending update\n");
+ *pending |= ICE_AQC_NVM_ACTIV_SEL_NVM;
+ }
+
+ if (dev_caps->common_cap.nvm_update_pending_orom) {
+ dev_info(dev, "The fw.undi flash component has a pending update\n");
+ *pending |= ICE_AQC_NVM_ACTIV_SEL_OROM;
+ }
+
+ if (dev_caps->common_cap.nvm_update_pending_netlist) {
+ dev_info(dev, "The fw.netlist flash component has a pending update\n");
+ *pending |= ICE_AQC_NVM_ACTIV_SEL_NETLIST;
+ }
+
+ kfree(dev_caps);
+
+ return 0;
+}
+
+/**
+ * ice_cancel_pending_update - Cancel any pending update for a component
+ * @pf: the PF driver structure
+ * @component: if not NULL, the name of the component being updated
+ * @extack: Netlink extended ACK structure
+ *
+ * Cancel any pending update for the specified component. If component is
+ * NULL, all device updates will be canceled.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_cancel_pending_update(struct ice_pf *pf, const char *component,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink *devlink = priv_to_devlink(pf);
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ u8 pending;
+ int err;
+
+ err = ice_get_pending_updates(pf, &pending, extack);
+ if (err)
+ return err;
+
+ /* If the flash_update request is for a specific component, ignore all
+ * of the other components.
+ */
+ if (component) {
+ if (strcmp(component, "fw.mgmt") == 0)
+ pending &= ICE_AQC_NVM_ACTIV_SEL_NVM;
+ else if (strcmp(component, "fw.undi") == 0)
+ pending &= ICE_AQC_NVM_ACTIV_SEL_OROM;
+ else if (strcmp(component, "fw.netlist") == 0)
+ pending &= ICE_AQC_NVM_ACTIV_SEL_NETLIST;
+ else
+ WARN(1, "Unexpected flash component %s", component);
+ }
+
+ /* There is no previous pending update, so this request may continue */
+ if (!pending)
+ return 0;
+
+ /* In order to allow overwriting a previous pending update, notify
+ * firmware to cancel that update by issuing the appropriate command.
+ */
+ devlink_flash_update_status_notify(devlink,
+ "Canceling previous pending update",
+ component, 0, 0);
+
+ err = ice_acquire_nvm(hw, ICE_RES_WRITE);
+ if (err) {
+ dev_err(dev, "Failed to acquire device flash lock, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ NL_SET_ERR_MSG_MOD(extack, "Failed to acquire device flash lock");
+ return err;
+ }
+
+ pending |= ICE_AQC_NVM_REVERT_LAST_ACTIV;
+ err = ice_switch_flash_banks(pf, pending, NULL, extack);
+
+ ice_release_nvm(hw);
+
+ /* Since we've canceled the pending update, we no longer know if EMP
+ * reset is restricted.
+ */
+ pf->fw_emp_reset_disabled = false;
+
+ return err;
+}
+
+/**
+ * ice_devlink_flash_update - Write a firmware image to the device
+ * @devlink: pointer to devlink associated with the device to update
+ * @params: devlink flash update parameters
+ * @extack: netlink extended ACK structure
+ *
+ * Parse the data for a given firmware file, verifying that it is a valid PLDM
+ * formatted image that matches this device.
+ *
+ * Extract the device record Package Data and Component Tables and send them
+ * to the firmware. Extract and write the flash data for each of the three
+ * main flash components, "fw.mgmt", "fw.undi", and "fw.netlist". Notify
+ * firmware once the data is written to the inactive banks.
+ *
+ * Returns: zero on success or a negative error code on failure.
+ */
+int ice_devlink_flash_update(struct devlink *devlink,
+ struct devlink_flash_update_params *params,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_pf *pf = devlink_priv(devlink);
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ struct ice_fwu_priv priv;
+ u8 preservation;
+ int err;
+
+ if (!params->overwrite_mask) {
+ /* preserve all settings and identifiers */
+ preservation = ICE_AQC_NVM_PRESERVE_ALL;
+ } else if (params->overwrite_mask == DEVLINK_FLASH_OVERWRITE_SETTINGS) {
+ /* overwrite settings, but preserve the vital device identifiers */
+ preservation = ICE_AQC_NVM_PRESERVE_SELECTED;
+ } else if (params->overwrite_mask == (DEVLINK_FLASH_OVERWRITE_SETTINGS |
+ DEVLINK_FLASH_OVERWRITE_IDENTIFIERS)) {
+ /* overwrite both settings and identifiers, preserve nothing */
+ preservation = ICE_AQC_NVM_NO_PRESERVATION;
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "Requested overwrite mask is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!hw->dev_caps.common_cap.nvm_unified_update) {
+ NL_SET_ERR_MSG_MOD(extack, "Current firmware does not support unified update");
+ return -EOPNOTSUPP;
+ }
+
+ memset(&priv, 0, sizeof(priv));
+
+ /* the E822 device needs a slightly different ops */
+ if (hw->mac_type == ICE_MAC_GENERIC)
+ priv.context.ops = &ice_fwu_ops_e822;
+ else
+ priv.context.ops = &ice_fwu_ops_e810;
+ priv.context.dev = dev;
+ priv.extack = extack;
+ priv.pf = pf;
+ priv.activate_flags = preservation;
+
+ devlink_flash_update_status_notify(devlink, "Preparing to flash", NULL, 0, 0);
+
+ err = ice_cancel_pending_update(pf, NULL, extack);
+ if (err)
+ return err;
+
+ err = ice_acquire_nvm(hw, ICE_RES_WRITE);
+ if (err) {
+ dev_err(dev, "Failed to acquire device flash lock, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ NL_SET_ERR_MSG_MOD(extack, "Failed to acquire device flash lock");
+ return err;
+ }
+
+ err = pldmfw_flash_image(&priv.context, params->fw);
+ if (err == -ENOENT) {
+ dev_err(dev, "Firmware image has no record matching this device\n");
+ NL_SET_ERR_MSG_MOD(extack, "Firmware image has no record matching this device");
+ } else if (err) {
+ /* Do not set a generic extended ACK message here. A more
+ * specific message may already have been set by one of our
+ * ops.
+ */
+ dev_err(dev, "Failed to flash PLDM image, err %d", err);
+ }
+
+ ice_release_nvm(hw);
+
+ return err;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.h b/drivers/net/ethernet/intel/ice/ice_fw_update.h
new file mode 100644
index 000000000..750574885
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fw_update.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2019, Intel Corporation. */
+
+#ifndef _ICE_FW_UPDATE_H_
+#define _ICE_FW_UPDATE_H_
+
+int ice_devlink_flash_update(struct devlink *devlink,
+ struct devlink_flash_update_params *params,
+ struct netlink_ext_ack *extack);
+int ice_get_pending_updates(struct ice_pf *pf, u8 *pending,
+ struct netlink_ext_ack *extack);
+
+#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c
new file mode 100644
index 000000000..43e199b5b
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_gnss.c
@@ -0,0 +1,572 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021-2022, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_lib.h"
+#include <linux/tty_driver.h>
+
+/**
+ * ice_gnss_do_write - Write data to internal GNSS
+ * @pf: board private structure
+ * @buf: command buffer
+ * @size: command buffer size
+ *
+ * Write UBX command data to the GNSS receiver
+ */
+static unsigned int
+ice_gnss_do_write(struct ice_pf *pf, unsigned char *buf, unsigned int size)
+{
+ struct ice_aqc_link_topo_addr link_topo;
+ struct ice_hw *hw = &pf->hw;
+ unsigned int offset = 0;
+ int err = 0;
+
+ memset(&link_topo, 0, sizeof(struct ice_aqc_link_topo_addr));
+ link_topo.topo_params.index = ICE_E810T_GNSS_I2C_BUS;
+ link_topo.topo_params.node_type_ctx |=
+ FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M,
+ ICE_AQC_LINK_TOPO_NODE_CTX_OVERRIDE);
+
+ /* It's not possible to write a single byte to u-blox.
+ * Write all bytes in a loop until there are 6 or less bytes left. If
+ * there are exactly 6 bytes left, the last write would be only a byte.
+ * In this case, do 4+2 bytes writes instead of 5+1. Otherwise, do the
+ * last 2 to 5 bytes write.
+ */
+ while (size - offset > ICE_GNSS_UBX_WRITE_BYTES + 1) {
+ err = ice_aq_write_i2c(hw, link_topo, ICE_GNSS_UBX_I2C_BUS_ADDR,
+ cpu_to_le16(buf[offset]),
+ ICE_MAX_I2C_WRITE_BYTES,
+ &buf[offset + 1], NULL);
+ if (err)
+ goto err_out;
+
+ offset += ICE_GNSS_UBX_WRITE_BYTES;
+ }
+
+ /* Single byte would be written. Write 4 bytes instead of 5. */
+ if (size - offset == ICE_GNSS_UBX_WRITE_BYTES + 1) {
+ err = ice_aq_write_i2c(hw, link_topo, ICE_GNSS_UBX_I2C_BUS_ADDR,
+ cpu_to_le16(buf[offset]),
+ ICE_MAX_I2C_WRITE_BYTES - 1,
+ &buf[offset + 1], NULL);
+ if (err)
+ goto err_out;
+
+ offset += ICE_GNSS_UBX_WRITE_BYTES - 1;
+ }
+
+ /* Do the last write, 2 to 5 bytes. */
+ err = ice_aq_write_i2c(hw, link_topo, ICE_GNSS_UBX_I2C_BUS_ADDR,
+ cpu_to_le16(buf[offset]), size - offset - 1,
+ &buf[offset + 1], NULL);
+ if (err)
+ goto err_out;
+
+ return size;
+
+err_out:
+ dev_err(ice_pf_to_dev(pf), "GNSS failed to write, offset=%u, size=%u, err=%d\n",
+ offset, size, err);
+
+ return offset;
+}
+
+/**
+ * ice_gnss_write_pending - Write all pending data to internal GNSS
+ * @work: GNSS write work structure
+ */
+static void ice_gnss_write_pending(struct kthread_work *work)
+{
+ struct gnss_serial *gnss = container_of(work, struct gnss_serial,
+ write_work);
+ struct ice_pf *pf = gnss->back;
+
+ if (!list_empty(&gnss->queue)) {
+ struct gnss_write_buf *write_buf = NULL;
+ unsigned int bytes;
+
+ write_buf = list_first_entry(&gnss->queue,
+ struct gnss_write_buf, queue);
+
+ bytes = ice_gnss_do_write(pf, write_buf->buf, write_buf->size);
+ dev_dbg(ice_pf_to_dev(pf), "%u bytes written to GNSS\n", bytes);
+
+ list_del(&write_buf->queue);
+ kfree(write_buf->buf);
+ kfree(write_buf);
+ }
+}
+
+/**
+ * ice_gnss_read - Read data from internal GNSS module
+ * @work: GNSS read work structure
+ *
+ * Read the data from internal GNSS receiver, number of bytes read will be
+ * returned in *read_data parameter.
+ */
+static void ice_gnss_read(struct kthread_work *work)
+{
+ struct gnss_serial *gnss = container_of(work, struct gnss_serial,
+ read_work.work);
+ struct ice_aqc_link_topo_addr link_topo;
+ unsigned int i, bytes_read, data_len;
+ struct tty_port *port;
+ struct ice_pf *pf;
+ struct ice_hw *hw;
+ __be16 data_len_b;
+ char *buf = NULL;
+ u8 i2c_params;
+ int err = 0;
+
+ pf = gnss->back;
+ if (!pf || !gnss->tty || !gnss->tty->port) {
+ err = -EFAULT;
+ goto exit;
+ }
+
+ hw = &pf->hw;
+ port = gnss->tty->port;
+
+ buf = (char *)get_zeroed_page(GFP_KERNEL);
+ if (!buf) {
+ err = -ENOMEM;
+ goto exit;
+ }
+
+ memset(&link_topo, 0, sizeof(struct ice_aqc_link_topo_addr));
+ link_topo.topo_params.index = ICE_E810T_GNSS_I2C_BUS;
+ link_topo.topo_params.node_type_ctx |=
+ FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M,
+ ICE_AQC_LINK_TOPO_NODE_CTX_OVERRIDE);
+
+ i2c_params = ICE_GNSS_UBX_DATA_LEN_WIDTH |
+ ICE_AQC_I2C_USE_REPEATED_START;
+
+ /* Read data length in a loop, when it's not 0 the data is ready */
+ for (i = 0; i < ICE_MAX_UBX_READ_TRIES; i++) {
+ err = ice_aq_read_i2c(hw, link_topo, ICE_GNSS_UBX_I2C_BUS_ADDR,
+ cpu_to_le16(ICE_GNSS_UBX_DATA_LEN_H),
+ i2c_params, (u8 *)&data_len_b, NULL);
+ if (err)
+ goto exit_buf;
+
+ data_len = be16_to_cpu(data_len_b);
+ if (data_len != 0 && data_len != U16_MAX)
+ break;
+
+ mdelay(10);
+ }
+
+ data_len = min_t(typeof(data_len), data_len, PAGE_SIZE);
+ data_len = tty_buffer_request_room(port, data_len);
+ if (!data_len) {
+ err = -ENOMEM;
+ goto exit_buf;
+ }
+
+ /* Read received data */
+ for (i = 0; i < data_len; i += bytes_read) {
+ unsigned int bytes_left = data_len - i;
+
+ bytes_read = min_t(typeof(bytes_left), bytes_left,
+ ICE_MAX_I2C_DATA_SIZE);
+
+ err = ice_aq_read_i2c(hw, link_topo, ICE_GNSS_UBX_I2C_BUS_ADDR,
+ cpu_to_le16(ICE_GNSS_UBX_EMPTY_DATA),
+ bytes_read, &buf[i], NULL);
+ if (err)
+ goto exit_buf;
+ }
+
+ /* Send the data to the tty layer for users to read. This doesn't
+ * actually push the data through unless tty->low_latency is set.
+ */
+ tty_insert_flip_string(port, buf, i);
+ tty_flip_buffer_push(port);
+
+exit_buf:
+ free_page((unsigned long)buf);
+ kthread_queue_delayed_work(gnss->kworker, &gnss->read_work,
+ ICE_GNSS_TIMER_DELAY_TIME);
+exit:
+ if (err)
+ dev_dbg(ice_pf_to_dev(pf), "GNSS failed to read err=%d\n", err);
+}
+
+/**
+ * ice_gnss_struct_init - Initialize GNSS structure for the TTY
+ * @pf: Board private structure
+ * @index: TTY device index
+ */
+static struct gnss_serial *ice_gnss_struct_init(struct ice_pf *pf, int index)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct kthread_worker *kworker;
+ struct gnss_serial *gnss;
+
+ gnss = kzalloc(sizeof(*gnss), GFP_KERNEL);
+ if (!gnss)
+ return NULL;
+
+ mutex_init(&gnss->gnss_mutex);
+ gnss->open_count = 0;
+ gnss->back = pf;
+ pf->gnss_serial[index] = gnss;
+
+ kthread_init_delayed_work(&gnss->read_work, ice_gnss_read);
+ INIT_LIST_HEAD(&gnss->queue);
+ kthread_init_work(&gnss->write_work, ice_gnss_write_pending);
+ /* Allocate a kworker for handling work required for the GNSS TTY
+ * writes.
+ */
+ kworker = kthread_create_worker(0, "ice-gnss-%s", dev_name(dev));
+ if (IS_ERR(kworker)) {
+ kfree(gnss);
+ return NULL;
+ }
+
+ gnss->kworker = kworker;
+
+ return gnss;
+}
+
+/**
+ * ice_gnss_tty_open - Initialize GNSS structures on TTY device open
+ * @tty: pointer to the tty_struct
+ * @filp: pointer to the file
+ *
+ * This routine is mandatory. If this routine is not filled in, the attempted
+ * open will fail with ENODEV.
+ */
+static int ice_gnss_tty_open(struct tty_struct *tty, struct file *filp)
+{
+ struct gnss_serial *gnss;
+ struct ice_pf *pf;
+
+ pf = (struct ice_pf *)tty->driver->driver_state;
+ if (!pf)
+ return -EFAULT;
+
+ /* Clear the pointer in case something fails */
+ tty->driver_data = NULL;
+
+ /* Get the serial object associated with this tty pointer */
+ gnss = pf->gnss_serial[tty->index];
+ if (!gnss) {
+ /* Initialize GNSS struct on the first device open */
+ gnss = ice_gnss_struct_init(pf, tty->index);
+ if (!gnss)
+ return -ENOMEM;
+ }
+
+ mutex_lock(&gnss->gnss_mutex);
+
+ /* Save our structure within the tty structure */
+ tty->driver_data = gnss;
+ gnss->tty = tty;
+ gnss->open_count++;
+ kthread_queue_delayed_work(gnss->kworker, &gnss->read_work, 0);
+
+ mutex_unlock(&gnss->gnss_mutex);
+
+ return 0;
+}
+
+/**
+ * ice_gnss_tty_close - Cleanup GNSS structures on tty device close
+ * @tty: pointer to the tty_struct
+ * @filp: pointer to the file
+ */
+static void ice_gnss_tty_close(struct tty_struct *tty, struct file *filp)
+{
+ struct gnss_serial *gnss = tty->driver_data;
+ struct ice_pf *pf;
+
+ if (!gnss)
+ return;
+
+ pf = (struct ice_pf *)tty->driver->driver_state;
+ if (!pf)
+ return;
+
+ mutex_lock(&gnss->gnss_mutex);
+
+ if (!gnss->open_count) {
+ /* Port was never opened */
+ dev_err(ice_pf_to_dev(pf), "GNSS port not opened\n");
+ goto exit;
+ }
+
+ gnss->open_count--;
+ if (gnss->open_count <= 0) {
+ /* Port is in shutdown state */
+ kthread_cancel_delayed_work_sync(&gnss->read_work);
+ }
+exit:
+ mutex_unlock(&gnss->gnss_mutex);
+}
+
+/**
+ * ice_gnss_tty_write - Write GNSS data
+ * @tty: pointer to the tty_struct
+ * @buf: pointer to the user data
+ * @count: the number of characters queued to be sent to the HW
+ *
+ * The write function call is called by the user when there is data to be sent
+ * to the hardware. First the tty core receives the call, and then it passes the
+ * data on to the tty driver's write function. The tty core also tells the tty
+ * driver the size of the data being sent.
+ * If any errors happen during the write call, a negative error value should be
+ * returned instead of the number of characters queued to be written.
+ */
+static int
+ice_gnss_tty_write(struct tty_struct *tty, const unsigned char *buf, int count)
+{
+ struct gnss_write_buf *write_buf;
+ struct gnss_serial *gnss;
+ unsigned char *cmd_buf;
+ struct ice_pf *pf;
+ int err = count;
+
+ /* We cannot write a single byte using our I2C implementation. */
+ if (count <= 1 || count > ICE_GNSS_TTY_WRITE_BUF)
+ return -EINVAL;
+
+ gnss = tty->driver_data;
+ if (!gnss)
+ return -EFAULT;
+
+ pf = (struct ice_pf *)tty->driver->driver_state;
+ if (!pf)
+ return -EFAULT;
+
+ /* Only allow to write on TTY 0 */
+ if (gnss != pf->gnss_serial[0])
+ return -EIO;
+
+ mutex_lock(&gnss->gnss_mutex);
+
+ if (!gnss->open_count) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ cmd_buf = kcalloc(count, sizeof(*buf), GFP_KERNEL);
+ if (!cmd_buf) {
+ err = -ENOMEM;
+ goto exit;
+ }
+
+ memcpy(cmd_buf, buf, count);
+
+ /* Send the data out to a hardware port */
+ write_buf = kzalloc(sizeof(*write_buf), GFP_KERNEL);
+ if (!write_buf) {
+ kfree(cmd_buf);
+ err = -ENOMEM;
+ goto exit;
+ }
+
+ write_buf->buf = cmd_buf;
+ write_buf->size = count;
+ INIT_LIST_HEAD(&write_buf->queue);
+ list_add_tail(&write_buf->queue, &gnss->queue);
+ kthread_queue_work(gnss->kworker, &gnss->write_work);
+exit:
+ mutex_unlock(&gnss->gnss_mutex);
+ return err;
+}
+
+/**
+ * ice_gnss_tty_write_room - Returns the numbers of characters to be written.
+ * @tty: pointer to the tty_struct
+ *
+ * This routine returns the numbers of characters the tty driver will accept
+ * for queuing to be written or 0 if either the TTY is not open or user
+ * tries to write to the TTY other than the first.
+ */
+static unsigned int ice_gnss_tty_write_room(struct tty_struct *tty)
+{
+ struct gnss_serial *gnss = tty->driver_data;
+
+ /* Only allow to write on TTY 0 */
+ if (!gnss || gnss != gnss->back->gnss_serial[0])
+ return 0;
+
+ mutex_lock(&gnss->gnss_mutex);
+
+ if (!gnss->open_count) {
+ mutex_unlock(&gnss->gnss_mutex);
+ return 0;
+ }
+
+ mutex_unlock(&gnss->gnss_mutex);
+ return ICE_GNSS_TTY_WRITE_BUF;
+}
+
+static const struct tty_operations tty_gps_ops = {
+ .open = ice_gnss_tty_open,
+ .close = ice_gnss_tty_close,
+ .write = ice_gnss_tty_write,
+ .write_room = ice_gnss_tty_write_room,
+};
+
+/**
+ * ice_gnss_create_tty_driver - Create a TTY driver for GNSS
+ * @pf: Board private structure
+ */
+static struct tty_driver *ice_gnss_create_tty_driver(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ const int ICE_TTYDRV_NAME_MAX = 14;
+ struct tty_driver *tty_driver;
+ char *ttydrv_name;
+ unsigned int i;
+ int err;
+
+ tty_driver = tty_alloc_driver(ICE_GNSS_TTY_MINOR_DEVICES,
+ TTY_DRIVER_REAL_RAW);
+ if (IS_ERR(tty_driver)) {
+ dev_err(dev, "Failed to allocate memory for GNSS TTY\n");
+ return NULL;
+ }
+
+ ttydrv_name = kzalloc(ICE_TTYDRV_NAME_MAX, GFP_KERNEL);
+ if (!ttydrv_name) {
+ tty_driver_kref_put(tty_driver);
+ return NULL;
+ }
+
+ snprintf(ttydrv_name, ICE_TTYDRV_NAME_MAX, "ttyGNSS_%02x%02x_",
+ (u8)pf->pdev->bus->number, (u8)PCI_SLOT(pf->pdev->devfn));
+
+ /* Initialize the tty driver*/
+ tty_driver->owner = THIS_MODULE;
+ tty_driver->driver_name = dev_driver_string(dev);
+ tty_driver->name = (const char *)ttydrv_name;
+ tty_driver->type = TTY_DRIVER_TYPE_SERIAL;
+ tty_driver->subtype = SERIAL_TYPE_NORMAL;
+ tty_driver->init_termios = tty_std_termios;
+ tty_driver->init_termios.c_iflag &= ~INLCR;
+ tty_driver->init_termios.c_iflag |= IGNCR;
+ tty_driver->init_termios.c_oflag &= ~OPOST;
+ tty_driver->init_termios.c_lflag &= ~ICANON;
+ tty_driver->init_termios.c_cflag &= ~(CSIZE | CBAUD | CBAUDEX);
+ /* baud rate 9600 */
+ tty_termios_encode_baud_rate(&tty_driver->init_termios, 9600, 9600);
+ tty_driver->driver_state = pf;
+ tty_set_operations(tty_driver, &tty_gps_ops);
+
+ for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) {
+ pf->gnss_tty_port[i] = kzalloc(sizeof(*pf->gnss_tty_port[i]),
+ GFP_KERNEL);
+ if (!pf->gnss_tty_port[i])
+ goto err_out;
+
+ pf->gnss_serial[i] = NULL;
+
+ tty_port_init(pf->gnss_tty_port[i]);
+ tty_port_link_device(pf->gnss_tty_port[i], tty_driver, i);
+ }
+
+ err = tty_register_driver(tty_driver);
+ if (err) {
+ dev_err(dev, "Failed to register TTY driver err=%d\n", err);
+ goto err_out;
+ }
+
+ for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++)
+ dev_info(dev, "%s%d registered\n", ttydrv_name, i);
+
+ return tty_driver;
+
+err_out:
+ while (i--) {
+ tty_port_destroy(pf->gnss_tty_port[i]);
+ kfree(pf->gnss_tty_port[i]);
+ }
+ kfree(ttydrv_name);
+ tty_driver_kref_put(pf->ice_gnss_tty_driver);
+
+ return NULL;
+}
+
+/**
+ * ice_gnss_init - Initialize GNSS TTY support
+ * @pf: Board private structure
+ */
+void ice_gnss_init(struct ice_pf *pf)
+{
+ struct tty_driver *tty_driver;
+
+ tty_driver = ice_gnss_create_tty_driver(pf);
+ if (!tty_driver)
+ return;
+
+ pf->ice_gnss_tty_driver = tty_driver;
+
+ set_bit(ICE_FLAG_GNSS, pf->flags);
+ dev_info(ice_pf_to_dev(pf), "GNSS TTY init successful\n");
+}
+
+/**
+ * ice_gnss_exit - Disable GNSS TTY support
+ * @pf: Board private structure
+ */
+void ice_gnss_exit(struct ice_pf *pf)
+{
+ unsigned int i;
+
+ if (!test_bit(ICE_FLAG_GNSS, pf->flags) || !pf->ice_gnss_tty_driver)
+ return;
+
+ for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) {
+ if (pf->gnss_tty_port[i]) {
+ tty_port_destroy(pf->gnss_tty_port[i]);
+ kfree(pf->gnss_tty_port[i]);
+ }
+
+ if (pf->gnss_serial[i]) {
+ struct gnss_serial *gnss = pf->gnss_serial[i];
+
+ kthread_cancel_work_sync(&gnss->write_work);
+ kthread_cancel_delayed_work_sync(&gnss->read_work);
+ kfree(gnss);
+ pf->gnss_serial[i] = NULL;
+ }
+ }
+
+ tty_unregister_driver(pf->ice_gnss_tty_driver);
+ kfree(pf->ice_gnss_tty_driver->name);
+ tty_driver_kref_put(pf->ice_gnss_tty_driver);
+ pf->ice_gnss_tty_driver = NULL;
+}
+
+/**
+ * ice_gnss_is_gps_present - Check if GPS HW is present
+ * @hw: pointer to HW struct
+ */
+bool ice_gnss_is_gps_present(struct ice_hw *hw)
+{
+ if (!hw->func_caps.ts_func_info.src_tmr_owned)
+ return false;
+
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
+ if (ice_is_e810t(hw)) {
+ int err;
+ u8 data;
+
+ err = ice_read_pca9575_reg_e810t(hw, ICE_PCA9575_P0_IN, &data);
+ if (err || !!(data & ICE_E810T_P0_GNSS_PRSNT_N))
+ return false;
+ } else {
+ return false;
+ }
+#else
+ if (!ice_is_e810t(hw))
+ return false;
+#endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
+
+ return true;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.h b/drivers/net/ethernet/intel/ice/ice_gnss.h
new file mode 100644
index 000000000..f454dd1d9
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_gnss.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021-2022, Intel Corporation. */
+
+#ifndef _ICE_GNSS_H_
+#define _ICE_GNSS_H_
+
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+
+#define ICE_E810T_GNSS_I2C_BUS 0x2
+#define ICE_GNSS_TIMER_DELAY_TIME (HZ / 10) /* 0.1 second per message */
+/* Create 2 minor devices, both using the same GNSS module. First one is RW,
+ * second one RO.
+ */
+#define ICE_GNSS_TTY_MINOR_DEVICES 2
+#define ICE_GNSS_TTY_WRITE_BUF 250
+#define ICE_MAX_I2C_DATA_SIZE FIELD_MAX(ICE_AQC_I2C_DATA_SIZE_M)
+#define ICE_MAX_I2C_WRITE_BYTES 4
+
+/* u-blox ZED-F9T specific definitions */
+#define ICE_GNSS_UBX_I2C_BUS_ADDR 0x42
+/* Data length register is big endian */
+#define ICE_GNSS_UBX_DATA_LEN_H 0xFD
+#define ICE_GNSS_UBX_DATA_LEN_WIDTH 2
+#define ICE_GNSS_UBX_EMPTY_DATA 0xFF
+/* For u-blox writes are performed without address so the first byte to write is
+ * passed as I2C addr parameter.
+ */
+#define ICE_GNSS_UBX_WRITE_BYTES (ICE_MAX_I2C_WRITE_BYTES + 1)
+#define ICE_MAX_UBX_READ_TRIES 255
+#define ICE_MAX_UBX_ACK_READ_TRIES 4095
+
+struct gnss_write_buf {
+ struct list_head queue;
+ unsigned int size;
+ unsigned char *buf;
+};
+
+
+/**
+ * struct gnss_serial - data used to initialize GNSS TTY port
+ * @back: back pointer to PF
+ * @tty: pointer to the tty for this device
+ * @open_count: number of times this port has been opened
+ * @gnss_mutex: gnss_mutex used to protect GNSS serial operations
+ * @kworker: kwork thread for handling periodic work
+ * @read_work: read_work function for handling GNSS reads
+ * @write_work: write_work function for handling GNSS writes
+ * @queue: write buffers queue
+ */
+struct gnss_serial {
+ struct ice_pf *back;
+ struct tty_struct *tty;
+ int open_count;
+ struct mutex gnss_mutex; /* protects GNSS serial structure */
+ struct kthread_worker *kworker;
+ struct kthread_delayed_work read_work;
+ struct kthread_work write_work;
+ struct list_head queue;
+};
+
+#if IS_ENABLED(CONFIG_TTY)
+void ice_gnss_init(struct ice_pf *pf);
+void ice_gnss_exit(struct ice_pf *pf);
+bool ice_gnss_is_gps_present(struct ice_hw *hw);
+#else
+static inline void ice_gnss_init(struct ice_pf *pf) { }
+static inline void ice_gnss_exit(struct ice_pf *pf) { }
+static inline bool ice_gnss_is_gps_present(struct ice_hw *hw)
+{
+ return false;
+}
+#endif /* IS_ENABLED(CONFIG_TTY) */
+#endif /* _ICE_GNSS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
new file mode 100644
index 000000000..d16738a3d
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -0,0 +1,502 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* Machine-generated file */
+
+#ifndef _ICE_HW_AUTOGEN_H_
+#define _ICE_HW_AUTOGEN_H_
+
+#define QTX_COMM_DBELL(_DBQM) (0x002C0000 + ((_DBQM) * 4))
+#define QTX_COMM_HEAD(_DBQM) (0x000E0000 + ((_DBQM) * 4))
+#define QTX_COMM_HEAD_HEAD_S 0
+#define QTX_COMM_HEAD_HEAD_M ICE_M(0x1FFF, 0)
+#define PF_FW_ARQBAH 0x00080180
+#define PF_FW_ARQBAL 0x00080080
+#define PF_FW_ARQH 0x00080380
+#define PF_FW_ARQH_ARQH_M ICE_M(0x3FF, 0)
+#define PF_FW_ARQLEN 0x00080280
+#define PF_FW_ARQLEN_ARQLEN_M ICE_M(0x3FF, 0)
+#define PF_FW_ARQLEN_ARQVFE_M BIT(28)
+#define PF_FW_ARQLEN_ARQOVFL_M BIT(29)
+#define PF_FW_ARQLEN_ARQCRIT_M BIT(30)
+#define PF_FW_ARQLEN_ARQENABLE_M BIT(31)
+#define PF_FW_ARQT 0x00080480
+#define PF_FW_ATQBAH 0x00080100
+#define PF_FW_ATQBAL 0x00080000
+#define PF_FW_ATQH 0x00080300
+#define PF_FW_ATQH_ATQH_M ICE_M(0x3FF, 0)
+#define PF_FW_ATQLEN 0x00080200
+#define PF_FW_ATQLEN_ATQLEN_M ICE_M(0x3FF, 0)
+#define PF_FW_ATQLEN_ATQVFE_M BIT(28)
+#define PF_FW_ATQLEN_ATQOVFL_M BIT(29)
+#define PF_FW_ATQLEN_ATQCRIT_M BIT(30)
+#define VF_MBX_ARQLEN(_VF) (0x0022BC00 + ((_VF) * 4))
+#define VF_MBX_ATQLEN(_VF) (0x0022A800 + ((_VF) * 4))
+#define PF_FW_ATQLEN_ATQENABLE_M BIT(31)
+#define PF_FW_ATQT 0x00080400
+#define PF_MBX_ARQBAH 0x0022E400
+#define PF_MBX_ARQBAL 0x0022E380
+#define PF_MBX_ARQH 0x0022E500
+#define PF_MBX_ARQH_ARQH_M ICE_M(0x3FF, 0)
+#define PF_MBX_ARQLEN 0x0022E480
+#define PF_MBX_ARQLEN_ARQLEN_M ICE_M(0x3FF, 0)
+#define PF_MBX_ARQLEN_ARQCRIT_M BIT(30)
+#define PF_MBX_ARQLEN_ARQENABLE_M BIT(31)
+#define PF_MBX_ARQT 0x0022E580
+#define PF_MBX_ATQBAH 0x0022E180
+#define PF_MBX_ATQBAL 0x0022E100
+#define PF_MBX_ATQH 0x0022E280
+#define PF_MBX_ATQH_ATQH_M ICE_M(0x3FF, 0)
+#define PF_MBX_ATQLEN 0x0022E200
+#define PF_MBX_ATQLEN_ATQLEN_M ICE_M(0x3FF, 0)
+#define PF_MBX_ATQLEN_ATQCRIT_M BIT(30)
+#define PF_MBX_ATQLEN_ATQENABLE_M BIT(31)
+#define PF_MBX_ATQT 0x0022E300
+#define PF_SB_ARQBAH 0x0022FF00
+#define PF_SB_ARQBAH_ARQBAH_S 0
+#define PF_SB_ARQBAH_ARQBAH_M ICE_M(0xFFFFFFFF, 0)
+#define PF_SB_ARQBAL 0x0022FE80
+#define PF_SB_ARQBAL_ARQBAL_LSB_S 0
+#define PF_SB_ARQBAL_ARQBAL_LSB_M ICE_M(0x3F, 0)
+#define PF_SB_ARQBAL_ARQBAL_S 6
+#define PF_SB_ARQBAL_ARQBAL_M ICE_M(0x3FFFFFF, 6)
+#define PF_SB_ARQH 0x00230000
+#define PF_SB_ARQH_ARQH_S 0
+#define PF_SB_ARQH_ARQH_M ICE_M(0x3FF, 0)
+#define PF_SB_ARQLEN 0x0022FF80
+#define PF_SB_ARQLEN_ARQLEN_S 0
+#define PF_SB_ARQLEN_ARQLEN_M ICE_M(0x3FF, 0)
+#define PF_SB_ARQLEN_ARQVFE_S 28
+#define PF_SB_ARQLEN_ARQVFE_M BIT(28)
+#define PF_SB_ARQLEN_ARQOVFL_S 29
+#define PF_SB_ARQLEN_ARQOVFL_M BIT(29)
+#define PF_SB_ARQLEN_ARQCRIT_S 30
+#define PF_SB_ARQLEN_ARQCRIT_M BIT(30)
+#define PF_SB_ARQLEN_ARQENABLE_S 31
+#define PF_SB_ARQLEN_ARQENABLE_M BIT(31)
+#define PF_SB_ARQT 0x00230080
+#define PF_SB_ARQT_ARQT_S 0
+#define PF_SB_ARQT_ARQT_M ICE_M(0x3FF, 0)
+#define PF_SB_ATQBAH 0x0022FC80
+#define PF_SB_ATQBAH_ATQBAH_S 0
+#define PF_SB_ATQBAH_ATQBAH_M ICE_M(0xFFFFFFFF, 0)
+#define PF_SB_ATQBAL 0x0022FC00
+#define PF_SB_ATQBAL_ATQBAL_S 6
+#define PF_SB_ATQBAL_ATQBAL_M ICE_M(0x3FFFFFF, 6)
+#define PF_SB_ATQH 0x0022FD80
+#define PF_SB_ATQH_ATQH_S 0
+#define PF_SB_ATQH_ATQH_M ICE_M(0x3FF, 0)
+#define PF_SB_ATQLEN 0x0022FD00
+#define PF_SB_ATQLEN_ATQLEN_S 0
+#define PF_SB_ATQLEN_ATQLEN_M ICE_M(0x3FF, 0)
+#define PF_SB_ATQLEN_ATQVFE_S 28
+#define PF_SB_ATQLEN_ATQVFE_M BIT(28)
+#define PF_SB_ATQLEN_ATQOVFL_S 29
+#define PF_SB_ATQLEN_ATQOVFL_M BIT(29)
+#define PF_SB_ATQLEN_ATQCRIT_S 30
+#define PF_SB_ATQLEN_ATQCRIT_M BIT(30)
+#define PF_SB_ATQLEN_ATQENABLE_S 31
+#define PF_SB_ATQLEN_ATQENABLE_M BIT(31)
+#define PF_SB_ATQT 0x0022FE00
+#define PF_SB_ATQT_ATQT_S 0
+#define PF_SB_ATQT_ATQT_M ICE_M(0x3FF, 0)
+#define PF_SB_REM_DEV_CTL 0x002300F0
+#define PRTDCB_GENC 0x00083000
+#define PRTDCB_GENC_PFCLDA_S 16
+#define PRTDCB_GENC_PFCLDA_M ICE_M(0xFFFF, 16)
+#define PRTDCB_GENS 0x00083020
+#define PRTDCB_GENS_DCBX_STATUS_S 0
+#define PRTDCB_GENS_DCBX_STATUS_M ICE_M(0x7, 0)
+#define PRTDCB_TUP2TC 0x001D26C0
+#define GL_PREEXT_L2_PMASK0(_i) (0x0020F0FC + ((_i) * 4))
+#define GL_PREEXT_L2_PMASK1(_i) (0x0020F108 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_0(_i) (0x0045c800 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S 0
+#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_M ICE_M(0xFF, 0)
+#define GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_S 30
+#define GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_M ICE_M(0x3, 30)
+#define GLFLXP_RXDID_FLX_WRD_1(_i) (0x0045c900 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_S 0
+#define GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_M ICE_M(0xFF, 0)
+#define GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_S 30
+#define GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_M ICE_M(0x3, 30)
+#define GLFLXP_RXDID_FLX_WRD_2(_i) (0x0045ca00 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_S 0
+#define GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_M ICE_M(0xFF, 0)
+#define GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_S 30
+#define GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_M ICE_M(0x3, 30)
+#define GLFLXP_RXDID_FLX_WRD_3(_i) (0x0045cb00 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_S 0
+#define GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_M ICE_M(0xFF, 0)
+#define GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_S 30
+#define GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_M ICE_M(0x3, 30)
+#define QRXFLXP_CNTXT(_QRX) (0x00480000 + ((_QRX) * 4))
+#define QRXFLXP_CNTXT_RXDID_IDX_S 0
+#define QRXFLXP_CNTXT_RXDID_IDX_M ICE_M(0x3F, 0)
+#define QRXFLXP_CNTXT_RXDID_PRIO_S 8
+#define QRXFLXP_CNTXT_RXDID_PRIO_M ICE_M(0x7, 8)
+#define QRXFLXP_CNTXT_TS_M BIT(11)
+#define GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_S 4
+#define GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_M ICE_M(0x3, 4)
+#define GLGEN_CLKSTAT_SRC 0x000B826C
+#define GLGEN_GPIO_CTL(_i) (0x000880C8 + ((_i) * 4))
+#define GLGEN_GPIO_CTL_PIN_DIR_M BIT(4)
+#define GLGEN_GPIO_CTL_PIN_FUNC_S 8
+#define GLGEN_GPIO_CTL_PIN_FUNC_M ICE_M(0xF, 8)
+#define GLGEN_RSTAT 0x000B8188
+#define GLGEN_RSTAT_DEVSTATE_M ICE_M(0x3, 0)
+#define GLGEN_RSTCTL 0x000B8180
+#define GLGEN_RSTCTL_GRSTDEL_S 0
+#define GLGEN_RSTCTL_GRSTDEL_M ICE_M(0x3F, GLGEN_RSTCTL_GRSTDEL_S)
+#define GLGEN_RSTAT_RESET_TYPE_S 2
+#define GLGEN_RSTAT_RESET_TYPE_M ICE_M(0x3, 2)
+#define GLGEN_RTRIG 0x000B8190
+#define GLGEN_RTRIG_CORER_M BIT(0)
+#define GLGEN_RTRIG_GLOBR_M BIT(1)
+#define GLGEN_STAT 0x000B612C
+#define GLGEN_VFLRSTAT(_i) (0x00093A04 + ((_i) * 4))
+#define PFGEN_CTRL 0x00091000
+#define PFGEN_CTRL_PFSWR_M BIT(0)
+#define PFGEN_STATE 0x00088000
+#define PRTGEN_STATUS 0x000B8100
+#define VFGEN_RSTAT(_VF) (0x00074000 + ((_VF) * 4))
+#define VPGEN_VFRSTAT(_VF) (0x00090800 + ((_VF) * 4))
+#define VPGEN_VFRSTAT_VFRD_M BIT(0)
+#define VPGEN_VFRTRIG(_VF) (0x00090000 + ((_VF) * 4))
+#define VPGEN_VFRTRIG_VFSWR_M BIT(0)
+#define GLINT_CTL 0x0016CC54
+#define GLINT_CTL_DIS_AUTOMASK_M BIT(0)
+#define GLINT_CTL_ITR_GRAN_200_S 16
+#define GLINT_CTL_ITR_GRAN_200_M ICE_M(0xF, 16)
+#define GLINT_CTL_ITR_GRAN_100_S 20
+#define GLINT_CTL_ITR_GRAN_100_M ICE_M(0xF, 20)
+#define GLINT_CTL_ITR_GRAN_50_S 24
+#define GLINT_CTL_ITR_GRAN_50_M ICE_M(0xF, 24)
+#define GLINT_CTL_ITR_GRAN_25_S 28
+#define GLINT_CTL_ITR_GRAN_25_M ICE_M(0xF, 28)
+#define GLINT_DYN_CTL(_INT) (0x00160000 + ((_INT) * 4))
+#define GLINT_DYN_CTL_INTENA_M BIT(0)
+#define GLINT_DYN_CTL_CLEARPBA_M BIT(1)
+#define GLINT_DYN_CTL_SWINT_TRIG_M BIT(2)
+#define GLINT_DYN_CTL_ITR_INDX_S 3
+#define GLINT_DYN_CTL_ITR_INDX_M ICE_M(0x3, 3)
+#define GLINT_DYN_CTL_INTERVAL_S 5
+#define GLINT_DYN_CTL_INTERVAL_M ICE_M(0xFFF, 5)
+#define GLINT_DYN_CTL_SW_ITR_INDX_ENA_M BIT(24)
+#define GLINT_DYN_CTL_SW_ITR_INDX_S 25
+#define GLINT_DYN_CTL_SW_ITR_INDX_M ICE_M(0x3, 25)
+#define GLINT_DYN_CTL_WB_ON_ITR_M BIT(30)
+#define GLINT_DYN_CTL_INTENA_MSK_M BIT(31)
+#define GLINT_ITR(_i, _INT) (0x00154000 + ((_i) * 8192 + (_INT) * 4))
+#define GLINT_RATE(_INT) (0x0015A000 + ((_INT) * 4))
+#define GLINT_RATE_INTRL_ENA_M BIT(6)
+#define GLINT_VECT2FUNC(_INT) (0x00162000 + ((_INT) * 4))
+#define GLINT_VECT2FUNC_VF_NUM_S 0
+#define GLINT_VECT2FUNC_VF_NUM_M ICE_M(0xFF, 0)
+#define GLINT_VECT2FUNC_PF_NUM_S 12
+#define GLINT_VECT2FUNC_PF_NUM_M ICE_M(0x7, 12)
+#define GLINT_VECT2FUNC_IS_PF_S 16
+#define GLINT_VECT2FUNC_IS_PF_M BIT(16)
+#define PFINT_FW_CTL 0x0016C800
+#define PFINT_FW_CTL_MSIX_INDX_M ICE_M(0x7FF, 0)
+#define PFINT_FW_CTL_ITR_INDX_S 11
+#define PFINT_FW_CTL_ITR_INDX_M ICE_M(0x3, 11)
+#define PFINT_FW_CTL_CAUSE_ENA_M BIT(30)
+#define PFINT_MBX_CTL 0x0016B280
+#define PFINT_MBX_CTL_MSIX_INDX_M ICE_M(0x7FF, 0)
+#define PFINT_MBX_CTL_ITR_INDX_S 11
+#define PFINT_MBX_CTL_ITR_INDX_M ICE_M(0x3, 11)
+#define PFINT_MBX_CTL_CAUSE_ENA_M BIT(30)
+#define PFINT_OICR 0x0016CA00
+#define PFINT_OICR_TSYN_TX_M BIT(11)
+#define PFINT_OICR_TSYN_EVNT_M BIT(12)
+#define PFINT_OICR_ECC_ERR_M BIT(16)
+#define PFINT_OICR_MAL_DETECT_M BIT(19)
+#define PFINT_OICR_GRST_M BIT(20)
+#define PFINT_OICR_PCI_EXCEPTION_M BIT(21)
+#define PFINT_OICR_HMC_ERR_M BIT(26)
+#define PFINT_OICR_PE_PUSH_M BIT(27)
+#define PFINT_OICR_PE_CRITERR_M BIT(28)
+#define PFINT_OICR_VFLR_M BIT(29)
+#define PFINT_OICR_SWINT_M BIT(31)
+#define PFINT_OICR_CTL 0x0016CA80
+#define PFINT_OICR_CTL_MSIX_INDX_M ICE_M(0x7FF, 0)
+#define PFINT_OICR_CTL_ITR_INDX_S 11
+#define PFINT_OICR_CTL_ITR_INDX_M ICE_M(0x3, 11)
+#define PFINT_OICR_CTL_CAUSE_ENA_M BIT(30)
+#define PFINT_OICR_ENA 0x0016C900
+#define PFINT_SB_CTL 0x0016B600
+#define PFINT_SB_CTL_MSIX_INDX_M ICE_M(0x7FF, 0)
+#define PFINT_SB_CTL_CAUSE_ENA_M BIT(30)
+#define QINT_RQCTL(_QRX) (0x00150000 + ((_QRX) * 4))
+#define QINT_RQCTL_MSIX_INDX_S 0
+#define QINT_RQCTL_MSIX_INDX_M ICE_M(0x7FF, 0)
+#define QINT_RQCTL_ITR_INDX_S 11
+#define QINT_RQCTL_ITR_INDX_M ICE_M(0x3, 11)
+#define QINT_RQCTL_CAUSE_ENA_M BIT(30)
+#define QINT_TQCTL(_DBQM) (0x00140000 + ((_DBQM) * 4))
+#define QINT_TQCTL_MSIX_INDX_S 0
+#define QINT_TQCTL_MSIX_INDX_M ICE_M(0x7FF, 0)
+#define QINT_TQCTL_ITR_INDX_S 11
+#define QINT_TQCTL_ITR_INDX_M ICE_M(0x3, 11)
+#define QINT_TQCTL_CAUSE_ENA_M BIT(30)
+#define VPINT_ALLOC(_VF) (0x001D1000 + ((_VF) * 4))
+#define VPINT_ALLOC_FIRST_S 0
+#define VPINT_ALLOC_FIRST_M ICE_M(0x7FF, 0)
+#define VPINT_ALLOC_LAST_S 12
+#define VPINT_ALLOC_LAST_M ICE_M(0x7FF, 12)
+#define VPINT_ALLOC_VALID_M BIT(31)
+#define VPINT_ALLOC_PCI(_VF) (0x0009D000 + ((_VF) * 4))
+#define VPINT_ALLOC_PCI_FIRST_S 0
+#define VPINT_ALLOC_PCI_FIRST_M ICE_M(0x7FF, 0)
+#define VPINT_ALLOC_PCI_LAST_S 12
+#define VPINT_ALLOC_PCI_LAST_M ICE_M(0x7FF, 12)
+#define VPINT_ALLOC_PCI_VALID_M BIT(31)
+#define VPINT_MBX_CTL(_VSI) (0x0016A000 + ((_VSI) * 4))
+#define VPINT_MBX_CTL_CAUSE_ENA_M BIT(30)
+#define GLLAN_RCTL_0 0x002941F8
+#define QRX_CONTEXT(_i, _QRX) (0x00280000 + ((_i) * 8192 + (_QRX) * 4))
+#define QRX_CTRL(_QRX) (0x00120000 + ((_QRX) * 4))
+#define QRX_CTRL_MAX_INDEX 2047
+#define QRX_CTRL_QENA_REQ_S 0
+#define QRX_CTRL_QENA_REQ_M BIT(0)
+#define QRX_CTRL_QENA_STAT_S 2
+#define QRX_CTRL_QENA_STAT_M BIT(2)
+#define QRX_ITR(_QRX) (0x00292000 + ((_QRX) * 4))
+#define QRX_TAIL(_QRX) (0x00290000 + ((_QRX) * 4))
+#define QRX_TAIL_MAX_INDEX 2047
+#define QRX_TAIL_TAIL_S 0
+#define QRX_TAIL_TAIL_M ICE_M(0x1FFF, 0)
+#define VPLAN_RX_QBASE(_VF) (0x00072000 + ((_VF) * 4))
+#define VPLAN_RX_QBASE_VFFIRSTQ_S 0
+#define VPLAN_RX_QBASE_VFFIRSTQ_M ICE_M(0x7FF, 0)
+#define VPLAN_RX_QBASE_VFNUMQ_S 16
+#define VPLAN_RX_QBASE_VFNUMQ_M ICE_M(0xFF, 16)
+#define VPLAN_RXQ_MAPENA(_VF) (0x00073000 + ((_VF) * 4))
+#define VPLAN_RXQ_MAPENA_RX_ENA_M BIT(0)
+#define VPLAN_TX_QBASE(_VF) (0x001D1800 + ((_VF) * 4))
+#define VPLAN_TX_QBASE_VFFIRSTQ_S 0
+#define VPLAN_TX_QBASE_VFFIRSTQ_M ICE_M(0x3FFF, 0)
+#define VPLAN_TX_QBASE_VFNUMQ_S 16
+#define VPLAN_TX_QBASE_VFNUMQ_M ICE_M(0xFF, 16)
+#define VPLAN_TXQ_MAPENA(_VF) (0x00073800 + ((_VF) * 4))
+#define VPLAN_TXQ_MAPENA_TX_ENA_M BIT(0)
+#define PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA(_i) (0x001E36E0 + ((_i) * 32))
+#define PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_MAX_INDEX 8
+#define PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_HSEC_CTL_TX_PAUSE_QUANTA_M ICE_M(0xFFFF, 0)
+#define PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(_i) (0x001E3800 + ((_i) * 32))
+#define PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_M ICE_M(0xFFFF, 0)
+#define GL_MDCK_TX_TDPU 0x00049348
+#define GL_MDCK_TX_TDPU_RCU_ANTISPOOF_ITR_DIS_M BIT(1)
+#define GL_MDET_RX 0x00294C00
+#define GL_MDET_RX_QNUM_S 0
+#define GL_MDET_RX_QNUM_M ICE_M(0x7FFF, 0)
+#define GL_MDET_RX_VF_NUM_S 15
+#define GL_MDET_RX_VF_NUM_M ICE_M(0xFF, 15)
+#define GL_MDET_RX_PF_NUM_S 23
+#define GL_MDET_RX_PF_NUM_M ICE_M(0x7, 23)
+#define GL_MDET_RX_MAL_TYPE_S 26
+#define GL_MDET_RX_MAL_TYPE_M ICE_M(0x1F, 26)
+#define GL_MDET_RX_VALID_M BIT(31)
+#define GL_MDET_TX_PQM 0x002D2E00
+#define GL_MDET_TX_PQM_PF_NUM_S 0
+#define GL_MDET_TX_PQM_PF_NUM_M ICE_M(0x7, 0)
+#define GL_MDET_TX_PQM_VF_NUM_S 4
+#define GL_MDET_TX_PQM_VF_NUM_M ICE_M(0xFF, 4)
+#define GL_MDET_TX_PQM_QNUM_S 12
+#define GL_MDET_TX_PQM_QNUM_M ICE_M(0x3FFF, 12)
+#define GL_MDET_TX_PQM_MAL_TYPE_S 26
+#define GL_MDET_TX_PQM_MAL_TYPE_M ICE_M(0x1F, 26)
+#define GL_MDET_TX_PQM_VALID_M BIT(31)
+#define GL_MDET_TX_TCLAN 0x000FC068
+#define GL_MDET_TX_TCLAN_QNUM_S 0
+#define GL_MDET_TX_TCLAN_QNUM_M ICE_M(0x7FFF, 0)
+#define GL_MDET_TX_TCLAN_VF_NUM_S 15
+#define GL_MDET_TX_TCLAN_VF_NUM_M ICE_M(0xFF, 15)
+#define GL_MDET_TX_TCLAN_PF_NUM_S 23
+#define GL_MDET_TX_TCLAN_PF_NUM_M ICE_M(0x7, 23)
+#define GL_MDET_TX_TCLAN_MAL_TYPE_S 26
+#define GL_MDET_TX_TCLAN_MAL_TYPE_M ICE_M(0x1F, 26)
+#define GL_MDET_TX_TCLAN_VALID_M BIT(31)
+#define PF_MDET_RX 0x00294280
+#define PF_MDET_RX_VALID_M BIT(0)
+#define PF_MDET_TX_PQM 0x002D2C80
+#define PF_MDET_TX_PQM_VALID_M BIT(0)
+#define PF_MDET_TX_TCLAN 0x000FC000
+#define PF_MDET_TX_TCLAN_VALID_M BIT(0)
+#define VP_MDET_RX(_VF) (0x00294400 + ((_VF) * 4))
+#define VP_MDET_RX_VALID_M BIT(0)
+#define VP_MDET_TX_PQM(_VF) (0x002D2000 + ((_VF) * 4))
+#define VP_MDET_TX_PQM_VALID_M BIT(0)
+#define VP_MDET_TX_TCLAN(_VF) (0x000FB800 + ((_VF) * 4))
+#define VP_MDET_TX_TCLAN_VALID_M BIT(0)
+#define VP_MDET_TX_TDPU(_VF) (0x00040000 + ((_VF) * 4))
+#define VP_MDET_TX_TDPU_VALID_M BIT(0)
+#define GLNVM_FLA 0x000B6108
+#define GLNVM_FLA_LOCKED_M BIT(6)
+#define GLNVM_GENS 0x000B6100
+#define GLNVM_GENS_SR_SIZE_S 5
+#define GLNVM_GENS_SR_SIZE_M ICE_M(0x7, 5)
+#define GLNVM_ULD 0x000B6008
+#define GLNVM_ULD_PCIER_DONE_M BIT(0)
+#define GLNVM_ULD_PCIER_DONE_1_M BIT(1)
+#define GLNVM_ULD_CORER_DONE_M BIT(3)
+#define GLNVM_ULD_GLOBR_DONE_M BIT(4)
+#define GLNVM_ULD_POR_DONE_M BIT(5)
+#define GLNVM_ULD_POR_DONE_1_M BIT(8)
+#define GLNVM_ULD_PCIER_DONE_2_M BIT(9)
+#define GLNVM_ULD_PE_DONE_M BIT(10)
+#define GLPCI_CNF2 0x000BE004
+#define GLPCI_CNF2_CACHELINE_SIZE_M BIT(1)
+#define PF_FUNC_RID 0x0009E880
+#define PF_FUNC_RID_FUNC_NUM_S 0
+#define PF_FUNC_RID_FUNC_NUM_M ICE_M(0x7, 0)
+#define PF_PCI_CIAA 0x0009E580
+#define PF_PCI_CIAA_VF_NUM_S 12
+#define PF_PCI_CIAD 0x0009E500
+#define GL_PWR_MODE_CTL 0x000B820C
+#define GL_PWR_MODE_CTL_CAR_MAX_BW_S 30
+#define GL_PWR_MODE_CTL_CAR_MAX_BW_M ICE_M(0x3, 30)
+#define GLQF_FD_CNT 0x00460018
+#define GLQF_FD_CNT_FD_BCNT_S 16
+#define GLQF_FD_CNT_FD_BCNT_M ICE_M(0x7FFF, 16)
+#define GLQF_FD_SIZE 0x00460010
+#define GLQF_FD_SIZE_FD_GSIZE_S 0
+#define GLQF_FD_SIZE_FD_GSIZE_M ICE_M(0x7FFF, 0)
+#define GLQF_FD_SIZE_FD_BSIZE_S 16
+#define GLQF_FD_SIZE_FD_BSIZE_M ICE_M(0x7FFF, 16)
+#define GLQF_FDINSET(_i, _j) (0x00412000 + ((_i) * 4 + (_j) * 512))
+#define GLQF_FDMASK(_i) (0x00410800 + ((_i) * 4))
+#define GLQF_FDMASK_MAX_INDEX 31
+#define GLQF_FDMASK_MSK_INDEX_S 0
+#define GLQF_FDMASK_MSK_INDEX_M ICE_M(0x1F, 0)
+#define GLQF_FDMASK_MASK_S 16
+#define GLQF_FDMASK_MASK_M ICE_M(0xFFFF, 16)
+#define GLQF_FDMASK_SEL(_i) (0x00410400 + ((_i) * 4))
+#define GLQF_FDSWAP(_i, _j) (0x00413000 + ((_i) * 4 + (_j) * 512))
+#define GLQF_HMASK(_i) (0x0040FC00 + ((_i) * 4))
+#define GLQF_HMASK_MAX_INDEX 31
+#define GLQF_HMASK_MSK_INDEX_S 0
+#define GLQF_HMASK_MSK_INDEX_M ICE_M(0x1F, 0)
+#define GLQF_HMASK_MASK_S 16
+#define GLQF_HMASK_MASK_M ICE_M(0xFFFF, 16)
+#define GLQF_HMASK_SEL(_i) (0x00410000 + ((_i) * 4))
+#define GLQF_HMASK_SEL_MAX_INDEX 127
+#define GLQF_HMASK_SEL_MASK_SEL_S 0
+#define PFQF_FD_ENA 0x0043A000
+#define PFQF_FD_ENA_FD_ENA_M BIT(0)
+#define PFQF_FD_SIZE 0x00460100
+#define GLDCB_RTCTQ_RXQNUM_S 0
+#define GLDCB_RTCTQ_RXQNUM_M ICE_M(0x7FF, 0)
+#define GLPRT_BPRCL(_i) (0x00381380 + ((_i) * 8))
+#define GLPRT_BPTCL(_i) (0x00381240 + ((_i) * 8))
+#define GLPRT_CRCERRS(_i) (0x00380100 + ((_i) * 8))
+#define GLPRT_GORCL(_i) (0x00380000 + ((_i) * 8))
+#define GLPRT_GOTCL(_i) (0x00380B40 + ((_i) * 8))
+#define GLPRT_ILLERRC(_i) (0x003801C0 + ((_i) * 8))
+#define GLPRT_LXOFFRXC(_i) (0x003802C0 + ((_i) * 8))
+#define GLPRT_LXOFFTXC(_i) (0x00381180 + ((_i) * 8))
+#define GLPRT_LXONRXC(_i) (0x00380280 + ((_i) * 8))
+#define GLPRT_LXONTXC(_i) (0x00381140 + ((_i) * 8))
+#define GLPRT_MLFC(_i) (0x00380040 + ((_i) * 8))
+#define GLPRT_MPRCL(_i) (0x00381340 + ((_i) * 8))
+#define GLPRT_MPTCL(_i) (0x00381200 + ((_i) * 8))
+#define GLPRT_MRFC(_i) (0x00380080 + ((_i) * 8))
+#define GLPRT_PRC1023L(_i) (0x00380A00 + ((_i) * 8))
+#define GLPRT_PRC127L(_i) (0x00380940 + ((_i) * 8))
+#define GLPRT_PRC1522L(_i) (0x00380A40 + ((_i) * 8))
+#define GLPRT_PRC255L(_i) (0x00380980 + ((_i) * 8))
+#define GLPRT_PRC511L(_i) (0x003809C0 + ((_i) * 8))
+#define GLPRT_PRC64L(_i) (0x00380900 + ((_i) * 8))
+#define GLPRT_PRC9522L(_i) (0x00380A80 + ((_i) * 8))
+#define GLPRT_PTC1023L(_i) (0x00380C80 + ((_i) * 8))
+#define GLPRT_PTC127L(_i) (0x00380BC0 + ((_i) * 8))
+#define GLPRT_PTC1522L(_i) (0x00380CC0 + ((_i) * 8))
+#define GLPRT_PTC255L(_i) (0x00380C00 + ((_i) * 8))
+#define GLPRT_PTC511L(_i) (0x00380C40 + ((_i) * 8))
+#define GLPRT_PTC64L(_i) (0x00380B80 + ((_i) * 8))
+#define GLPRT_PTC9522L(_i) (0x00380D00 + ((_i) * 8))
+#define GLPRT_PXOFFRXC(_i, _j) (0x00380500 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_PXOFFTXC(_i, _j) (0x00380F40 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_PXONRXC(_i, _j) (0x00380300 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_PXONTXC(_i, _j) (0x00380D40 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_RFC(_i) (0x00380AC0 + ((_i) * 8))
+#define GLPRT_RJC(_i) (0x00380B00 + ((_i) * 8))
+#define GLPRT_RLEC(_i) (0x00380140 + ((_i) * 8))
+#define GLPRT_ROC(_i) (0x00380240 + ((_i) * 8))
+#define GLPRT_RUC(_i) (0x00380200 + ((_i) * 8))
+#define GLPRT_RXON2OFFCNT(_i, _j) (0x00380700 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_TDOLD(_i) (0x00381280 + ((_i) * 8))
+#define GLPRT_UPRCL(_i) (0x00381300 + ((_i) * 8))
+#define GLPRT_UPTCL(_i) (0x003811C0 + ((_i) * 8))
+#define GLSTAT_FD_CNT0L(_i) (0x003A0000 + ((_i) * 8))
+#define GLV_BPRCL(_i) (0x003B6000 + ((_i) * 8))
+#define GLV_BPTCL(_i) (0x0030E000 + ((_i) * 8))
+#define GLV_GORCL(_i) (0x003B0000 + ((_i) * 8))
+#define GLV_GOTCL(_i) (0x00300000 + ((_i) * 8))
+#define GLV_MPRCL(_i) (0x003B4000 + ((_i) * 8))
+#define GLV_MPTCL(_i) (0x0030C000 + ((_i) * 8))
+#define GLV_RDPC(_i) (0x00294C04 + ((_i) * 4))
+#define GLV_TEPC(_VSI) (0x00312000 + ((_VSI) * 4))
+#define GLV_UPRCL(_i) (0x003B2000 + ((_i) * 8))
+#define GLV_UPTCL(_i) (0x0030A000 + ((_i) * 8))
+#define PRTRPB_RDPC 0x000AC260
+#define GLHH_ART_CTL 0x000A41D4
+#define GLHH_ART_CTL_ACTIVE_M BIT(0)
+#define GLHH_ART_TIME_H 0x000A41D8
+#define GLHH_ART_TIME_L 0x000A41DC
+#define GLTSYN_AUX_IN_0(_i) (0x000889D8 + ((_i) * 4))
+#define GLTSYN_AUX_IN_0_INT_ENA_M BIT(4)
+#define GLTSYN_AUX_OUT_0(_i) (0x00088998 + ((_i) * 4))
+#define GLTSYN_AUX_OUT_0_OUT_ENA_M BIT(0)
+#define GLTSYN_AUX_OUT_0_OUTMOD_M ICE_M(0x3, 1)
+#define GLTSYN_CLKO_0(_i) (0x000889B8 + ((_i) * 4))
+#define GLTSYN_CMD 0x00088810
+#define GLTSYN_CMD_SYNC 0x00088814
+#define GLTSYN_ENA(_i) (0x00088808 + ((_i) * 4))
+#define GLTSYN_ENA_TSYN_ENA_M BIT(0)
+#define GLTSYN_EVNT_H_0(_i) (0x00088970 + ((_i) * 4))
+#define GLTSYN_EVNT_L_0(_i) (0x00088968 + ((_i) * 4))
+#define GLTSYN_HHTIME_H(_i) (0x00088900 + ((_i) * 4))
+#define GLTSYN_HHTIME_L(_i) (0x000888F8 + ((_i) * 4))
+#define GLTSYN_INCVAL_H(_i) (0x00088920 + ((_i) * 4))
+#define GLTSYN_INCVAL_L(_i) (0x00088918 + ((_i) * 4))
+#define GLTSYN_SHADJ_H(_i) (0x00088910 + ((_i) * 4))
+#define GLTSYN_SHADJ_L(_i) (0x00088908 + ((_i) * 4))
+#define GLTSYN_SHTIME_0(_i) (0x000888E0 + ((_i) * 4))
+#define GLTSYN_SHTIME_H(_i) (0x000888F0 + ((_i) * 4))
+#define GLTSYN_SHTIME_L(_i) (0x000888E8 + ((_i) * 4))
+#define GLTSYN_STAT(_i) (0x000888C0 + ((_i) * 4))
+#define GLTSYN_STAT_EVENT0_M BIT(0)
+#define GLTSYN_STAT_EVENT1_M BIT(1)
+#define GLTSYN_STAT_EVENT2_M BIT(2)
+#define GLTSYN_SYNC_DLAY 0x00088818
+#define GLTSYN_TGT_H_0(_i) (0x00088930 + ((_i) * 4))
+#define GLTSYN_TGT_L_0(_i) (0x00088928 + ((_i) * 4))
+#define GLTSYN_TIME_H(_i) (0x000888D8 + ((_i) * 4))
+#define GLTSYN_TIME_L(_i) (0x000888D0 + ((_i) * 4))
+#define PFHH_SEM 0x000A4200 /* Reset Source: PFR */
+#define PFHH_SEM_BUSY_M BIT(0)
+#define PFTSYN_SEM 0x00088880
+#define PFTSYN_SEM_BUSY_M BIT(0)
+#define VSIQF_FD_CNT(_VSI) (0x00464000 + ((_VSI) * 4))
+#define VSIQF_FD_CNT_FD_GCNT_S 0
+#define VSIQF_FD_CNT_FD_GCNT_M ICE_M(0x3FFF, 0)
+#define VSIQF_FD_CNT_FD_BCNT_S 16
+#define VSIQF_FD_CNT_FD_BCNT_M ICE_M(0x3FFF, 16)
+#define VSIQF_FD_SIZE(_VSI) (0x00462000 + ((_VSI) * 4))
+#define VSIQF_HKEY_MAX_INDEX 12
+#define VSIQF_HLUT_MAX_INDEX 15
+#define PFPM_APM 0x000B8080
+#define PFPM_APM_APME_M BIT(0)
+#define PFPM_WUFC 0x0009DC00
+#define PFPM_WUFC_MAG_M BIT(1)
+#define PFPM_WUS 0x0009DB80
+#define PFPM_WUS_LNKC_M BIT(0)
+#define PFPM_WUS_MAG_M BIT(1)
+#define PFPM_WUS_MNG_M BIT(3)
+#define PFPM_WUS_FW_RST_WK_M BIT(31)
+#define VFINT_DYN_CTLN(_i) (0x00003800 + ((_i) * 4))
+#define VFINT_DYN_CTLN_CLEARPBA_M BIT(1)
+
+#endif /* _ICE_HW_AUTOGEN_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c
new file mode 100644
index 000000000..895c32bcc
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_idc.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021, Intel Corporation. */
+
+/* Inter-Driver Communication */
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_dcb_lib.h"
+
+/**
+ * ice_get_auxiliary_drv - retrieve iidc_auxiliary_drv struct
+ * @pf: pointer to PF struct
+ *
+ * This function has to be called with a device_lock on the
+ * pf->adev.dev to avoid race conditions.
+ */
+static struct iidc_auxiliary_drv *ice_get_auxiliary_drv(struct ice_pf *pf)
+{
+ struct auxiliary_device *adev;
+
+ adev = pf->adev;
+ if (!adev || !adev->dev.driver)
+ return NULL;
+
+ return container_of(adev->dev.driver, struct iidc_auxiliary_drv,
+ adrv.driver);
+}
+
+/**
+ * ice_send_event_to_aux - send event to RDMA AUX driver
+ * @pf: pointer to PF struct
+ * @event: event struct
+ */
+void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event)
+{
+ struct iidc_auxiliary_drv *iadrv;
+
+ if (WARN_ON_ONCE(!in_task()))
+ return;
+
+ mutex_lock(&pf->adev_mutex);
+ if (!pf->adev)
+ goto finish;
+
+ device_lock(&pf->adev->dev);
+ iadrv = ice_get_auxiliary_drv(pf);
+ if (iadrv && iadrv->event_handler)
+ iadrv->event_handler(pf, event);
+ device_unlock(&pf->adev->dev);
+finish:
+ mutex_unlock(&pf->adev_mutex);
+}
+
+/**
+ * ice_add_rdma_qset - Add Leaf Node for RDMA Qset
+ * @pf: PF struct
+ * @qset: Resource to be allocated
+ */
+int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset)
+{
+ u16 max_rdmaqs[ICE_MAX_TRAFFIC_CLASS];
+ struct ice_vsi *vsi;
+ struct device *dev;
+ u32 qset_teid;
+ u16 qs_handle;
+ int status;
+ int i;
+
+ if (WARN_ON(!pf || !qset))
+ return -EINVAL;
+
+ dev = ice_pf_to_dev(pf);
+
+ if (!ice_is_rdma_ena(pf))
+ return -EINVAL;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi) {
+ dev_err(dev, "RDMA QSet invalid VSI\n");
+ return -EINVAL;
+ }
+
+ ice_for_each_traffic_class(i)
+ max_rdmaqs[i] = 0;
+
+ max_rdmaqs[qset->tc]++;
+ qs_handle = qset->qs_handle;
+
+ status = ice_cfg_vsi_rdma(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+ max_rdmaqs);
+ if (status) {
+ dev_err(dev, "Failed VSI RDMA Qset config\n");
+ return status;
+ }
+
+ status = ice_ena_vsi_rdma_qset(vsi->port_info, vsi->idx, qset->tc,
+ &qs_handle, 1, &qset_teid);
+ if (status) {
+ dev_err(dev, "Failed VSI RDMA Qset enable\n");
+ return status;
+ }
+ vsi->qset_handle[qset->tc] = qset->qs_handle;
+ qset->teid = qset_teid;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ice_add_rdma_qset);
+
+/**
+ * ice_del_rdma_qset - Delete leaf node for RDMA Qset
+ * @pf: PF struct
+ * @qset: Resource to be freed
+ */
+int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset)
+{
+ struct ice_vsi *vsi;
+ u32 teid;
+ u16 q_id;
+
+ if (WARN_ON(!pf || !qset))
+ return -EINVAL;
+
+ vsi = ice_find_vsi(pf, qset->vport_id);
+ if (!vsi) {
+ dev_err(ice_pf_to_dev(pf), "RDMA Invalid VSI\n");
+ return -EINVAL;
+ }
+
+ q_id = qset->qs_handle;
+ teid = qset->teid;
+
+ vsi->qset_handle[qset->tc] = 0;
+
+ return ice_dis_vsi_rdma_qset(vsi->port_info, 1, &teid, &q_id);
+}
+EXPORT_SYMBOL_GPL(ice_del_rdma_qset);
+
+/**
+ * ice_rdma_request_reset - accept request from RDMA to perform a reset
+ * @pf: struct for PF
+ * @reset_type: type of reset
+ */
+int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type)
+{
+ enum ice_reset_req reset;
+
+ if (WARN_ON(!pf))
+ return -EINVAL;
+
+ switch (reset_type) {
+ case IIDC_PFR:
+ reset = ICE_RESET_PFR;
+ break;
+ case IIDC_CORER:
+ reset = ICE_RESET_CORER;
+ break;
+ case IIDC_GLOBR:
+ reset = ICE_RESET_GLOBR;
+ break;
+ default:
+ dev_err(ice_pf_to_dev(pf), "incorrect reset request\n");
+ return -EINVAL;
+ }
+
+ return ice_schedule_reset(pf, reset);
+}
+EXPORT_SYMBOL_GPL(ice_rdma_request_reset);
+
+/**
+ * ice_rdma_update_vsi_filter - update main VSI filters for RDMA
+ * @pf: pointer to struct for PF
+ * @vsi_id: VSI HW idx to update filter on
+ * @enable: bool whether to enable or disable filters
+ */
+int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable)
+{
+ struct ice_vsi *vsi;
+ int status;
+
+ if (WARN_ON(!pf))
+ return -EINVAL;
+
+ vsi = ice_find_vsi(pf, vsi_id);
+ if (!vsi)
+ return -EINVAL;
+
+ status = ice_cfg_rdma_fltr(&pf->hw, vsi->idx, enable);
+ if (status) {
+ dev_err(ice_pf_to_dev(pf), "Failed to %sable RDMA filtering\n",
+ enable ? "en" : "dis");
+ } else {
+ if (enable)
+ vsi->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+ else
+ vsi->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+ }
+
+ return status;
+}
+EXPORT_SYMBOL_GPL(ice_rdma_update_vsi_filter);
+
+/**
+ * ice_get_qos_params - parse QoS params for RDMA consumption
+ * @pf: pointer to PF struct
+ * @qos: set of QoS values
+ */
+void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos)
+{
+ struct ice_dcbx_cfg *dcbx_cfg;
+ unsigned int i;
+ u32 up2tc;
+
+ dcbx_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+ up2tc = rd32(&pf->hw, PRTDCB_TUP2TC);
+
+ qos->num_tc = ice_dcb_get_num_tc(dcbx_cfg);
+ for (i = 0; i < IIDC_MAX_USER_PRIORITY; i++)
+ qos->up2tc[i] = (up2tc >> (i * 3)) & 0x7;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ qos->tc_info[i].rel_bw = dcbx_cfg->etscfg.tcbwtable[i];
+
+ qos->pfc_mode = dcbx_cfg->pfc_mode;
+ if (qos->pfc_mode == IIDC_DSCP_PFC_MODE)
+ for (i = 0; i < IIDC_MAX_DSCP_MAPPING; i++)
+ qos->dscp_map[i] = dcbx_cfg->dscp_map[i];
+}
+EXPORT_SYMBOL_GPL(ice_get_qos_params);
+
+/**
+ * ice_reserve_rdma_qvector - Reserve vector resources for RDMA driver
+ * @pf: board private structure to initialize
+ */
+static int ice_reserve_rdma_qvector(struct ice_pf *pf)
+{
+ if (ice_is_rdma_ena(pf)) {
+ int index;
+
+ index = ice_get_res(pf, pf->irq_tracker, pf->num_rdma_msix,
+ ICE_RES_RDMA_VEC_ID);
+ if (index < 0)
+ return index;
+ pf->num_avail_sw_msix -= pf->num_rdma_msix;
+ pf->rdma_base_vector = (u16)index;
+ }
+ return 0;
+}
+
+/**
+ * ice_adev_release - function to be mapped to AUX dev's release op
+ * @dev: pointer to device to free
+ */
+static void ice_adev_release(struct device *dev)
+{
+ struct iidc_auxiliary_dev *iadev;
+
+ iadev = container_of(dev, struct iidc_auxiliary_dev, adev.dev);
+ kfree(iadev);
+}
+
+/**
+ * ice_plug_aux_dev - allocate and register AUX device
+ * @pf: pointer to pf struct
+ */
+int ice_plug_aux_dev(struct ice_pf *pf)
+{
+ struct iidc_auxiliary_dev *iadev;
+ struct auxiliary_device *adev;
+ int ret;
+
+ /* if this PF doesn't support a technology that requires auxiliary
+ * devices, then gracefully exit
+ */
+ if (!ice_is_rdma_ena(pf))
+ return 0;
+
+ iadev = kzalloc(sizeof(*iadev), GFP_KERNEL);
+ if (!iadev)
+ return -ENOMEM;
+
+ adev = &iadev->adev;
+ iadev->pf = pf;
+
+ adev->id = pf->aux_idx;
+ adev->dev.release = ice_adev_release;
+ adev->dev.parent = &pf->pdev->dev;
+ adev->name = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? "roce" : "iwarp";
+
+ ret = auxiliary_device_init(adev);
+ if (ret) {
+ kfree(iadev);
+ return ret;
+ }
+
+ ret = auxiliary_device_add(adev);
+ if (ret) {
+ auxiliary_device_uninit(adev);
+ return ret;
+ }
+
+ mutex_lock(&pf->adev_mutex);
+ pf->adev = adev;
+ mutex_unlock(&pf->adev_mutex);
+
+ return 0;
+}
+
+/* ice_unplug_aux_dev - unregister and free AUX device
+ * @pf: pointer to pf struct
+ */
+void ice_unplug_aux_dev(struct ice_pf *pf)
+{
+ struct auxiliary_device *adev;
+
+ mutex_lock(&pf->adev_mutex);
+ adev = pf->adev;
+ pf->adev = NULL;
+ mutex_unlock(&pf->adev_mutex);
+
+ if (adev) {
+ auxiliary_device_delete(adev);
+ auxiliary_device_uninit(adev);
+ }
+}
+
+/**
+ * ice_init_rdma - initializes PF for RDMA use
+ * @pf: ptr to ice_pf
+ */
+int ice_init_rdma(struct ice_pf *pf)
+{
+ struct device *dev = &pf->pdev->dev;
+ int ret;
+
+ /* Reserve vector resources */
+ ret = ice_reserve_rdma_qvector(pf);
+ if (ret < 0) {
+ dev_err(dev, "failed to reserve vectors for RDMA\n");
+ return ret;
+ }
+ pf->rdma_mode |= IIDC_RDMA_PROTOCOL_ROCEV2;
+ return ice_plug_aux_dev(pf);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_idc_int.h b/drivers/net/ethernet/intel/ice/ice_idc_int.h
new file mode 100644
index 000000000..4b0c86757
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_idc_int.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021, Intel Corporation. */
+
+#ifndef _ICE_IDC_INT_H_
+#define _ICE_IDC_INT_H_
+
+#include <linux/net/intel/iidc.h>
+
+struct ice_pf;
+
+void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event);
+
+#endif /* !_ICE_IDC_INT_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
new file mode 100644
index 000000000..ee5b36941
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2021, Intel Corporation. */
+
+/* Link Aggregation code */
+
+#include "ice.h"
+#include "ice_lag.h"
+
+/**
+ * ice_lag_nop_handler - no-op Rx handler to disable LAG
+ * @pskb: pointer to skb pointer
+ */
+rx_handler_result_t ice_lag_nop_handler(struct sk_buff __always_unused **pskb)
+{
+ return RX_HANDLER_PASS;
+}
+
+/**
+ * ice_lag_set_primary - set PF LAG state as Primary
+ * @lag: LAG info struct
+ */
+static void ice_lag_set_primary(struct ice_lag *lag)
+{
+ struct ice_pf *pf = lag->pf;
+
+ if (!pf)
+ return;
+
+ if (lag->role != ICE_LAG_UNSET && lag->role != ICE_LAG_BACKUP) {
+ dev_warn(ice_pf_to_dev(pf), "%s: Attempt to be Primary, but incompatible state.\n",
+ netdev_name(lag->netdev));
+ return;
+ }
+
+ lag->role = ICE_LAG_PRIMARY;
+}
+
+/**
+ * ice_lag_set_backup - set PF LAG state to Backup
+ * @lag: LAG info struct
+ */
+static void ice_lag_set_backup(struct ice_lag *lag)
+{
+ struct ice_pf *pf = lag->pf;
+
+ if (!pf)
+ return;
+
+ if (lag->role != ICE_LAG_UNSET && lag->role != ICE_LAG_PRIMARY) {
+ dev_dbg(ice_pf_to_dev(pf), "%s: Attempt to be Backup, but incompatible state\n",
+ netdev_name(lag->netdev));
+ return;
+ }
+
+ lag->role = ICE_LAG_BACKUP;
+}
+
+/**
+ * ice_display_lag_info - print LAG info
+ * @lag: LAG info struct
+ */
+static void ice_display_lag_info(struct ice_lag *lag)
+{
+ const char *name, *peer, *upper, *role, *bonded, *primary;
+ struct device *dev = &lag->pf->pdev->dev;
+
+ name = lag->netdev ? netdev_name(lag->netdev) : "unset";
+ peer = lag->peer_netdev ? netdev_name(lag->peer_netdev) : "unset";
+ upper = lag->upper_netdev ? netdev_name(lag->upper_netdev) : "unset";
+ primary = lag->primary ? "TRUE" : "FALSE";
+ bonded = lag->bonded ? "BONDED" : "UNBONDED";
+
+ switch (lag->role) {
+ case ICE_LAG_NONE:
+ role = "NONE";
+ break;
+ case ICE_LAG_PRIMARY:
+ role = "PRIMARY";
+ break;
+ case ICE_LAG_BACKUP:
+ role = "BACKUP";
+ break;
+ case ICE_LAG_UNSET:
+ role = "UNSET";
+ break;
+ default:
+ role = "ERROR";
+ }
+
+ dev_dbg(dev, "%s %s, peer:%s, upper:%s, role:%s, primary:%s\n", name,
+ bonded, peer, upper, role, primary);
+}
+
+/**
+ * ice_lag_info_event - handle NETDEV_BONDING_INFO event
+ * @lag: LAG info struct
+ * @ptr: opaque data pointer
+ *
+ * ptr is to be cast to (netdev_notifier_bonding_info *)
+ */
+static void ice_lag_info_event(struct ice_lag *lag, void *ptr)
+{
+ struct netdev_notifier_bonding_info *info;
+ struct netdev_bonding_info *bonding_info;
+ struct net_device *event_netdev;
+ const char *lag_netdev_name;
+
+ event_netdev = netdev_notifier_info_to_dev(ptr);
+ info = ptr;
+ lag_netdev_name = netdev_name(lag->netdev);
+ bonding_info = &info->bonding_info;
+
+ if (event_netdev != lag->netdev || !lag->bonded || !lag->upper_netdev)
+ return;
+
+ if (bonding_info->master.bond_mode != BOND_MODE_ACTIVEBACKUP) {
+ netdev_dbg(lag->netdev, "Bonding event recv, but mode not active/backup\n");
+ goto lag_out;
+ }
+
+ if (strcmp(bonding_info->slave.slave_name, lag_netdev_name)) {
+ netdev_dbg(lag->netdev, "Bonding event recv, but secondary info not for us\n");
+ goto lag_out;
+ }
+
+ if (bonding_info->slave.state)
+ ice_lag_set_backup(lag);
+ else
+ ice_lag_set_primary(lag);
+
+lag_out:
+ ice_display_lag_info(lag);
+}
+
+/**
+ * ice_lag_link - handle LAG link event
+ * @lag: LAG info struct
+ * @info: info from the netdev notifier
+ */
+static void
+ice_lag_link(struct ice_lag *lag, struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *netdev_tmp, *upper = info->upper_dev;
+ struct ice_pf *pf = lag->pf;
+ int peers = 0;
+
+ if (lag->bonded)
+ dev_warn(ice_pf_to_dev(pf), "%s Already part of a bond\n",
+ netdev_name(lag->netdev));
+
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(upper, netdev_tmp)
+ peers++;
+ rcu_read_unlock();
+
+ if (lag->upper_netdev != upper) {
+ dev_hold(upper);
+ lag->upper_netdev = upper;
+ }
+
+ ice_clear_sriov_cap(pf);
+ ice_clear_rdma_cap(pf);
+
+ lag->bonded = true;
+ lag->role = ICE_LAG_UNSET;
+
+ /* if this is the first element in an LAG mark as primary */
+ lag->primary = !!(peers == 1);
+}
+
+/**
+ * ice_lag_unlink - handle unlink event
+ * @lag: LAG info struct
+ * @info: info from netdev notification
+ */
+static void
+ice_lag_unlink(struct ice_lag *lag,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *netdev_tmp, *upper = info->upper_dev;
+ struct ice_pf *pf = lag->pf;
+ bool found = false;
+
+ if (!lag->bonded) {
+ netdev_dbg(lag->netdev, "bonding unlink event on non-LAG netdev\n");
+ return;
+ }
+
+ /* determine if we are in the new LAG config or not */
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(upper, netdev_tmp) {
+ if (netdev_tmp == lag->netdev) {
+ found = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (found)
+ return;
+
+ if (lag->upper_netdev) {
+ dev_put(lag->upper_netdev);
+ lag->upper_netdev = NULL;
+ }
+
+ lag->peer_netdev = NULL;
+ ice_set_sriov_cap(pf);
+ ice_set_rdma_cap(pf);
+ lag->bonded = false;
+ lag->role = ICE_LAG_NONE;
+}
+
+/**
+ * ice_lag_unregister - handle netdev unregister events
+ * @lag: LAG info struct
+ * @netdev: netdev reporting the event
+ */
+static void ice_lag_unregister(struct ice_lag *lag, struct net_device *netdev)
+{
+ struct ice_pf *pf = lag->pf;
+
+ /* check to see if this event is for this netdev
+ * check that we are in an aggregate
+ */
+ if (netdev != lag->netdev || !lag->bonded)
+ return;
+
+ if (lag->upper_netdev) {
+ dev_put(lag->upper_netdev);
+ lag->upper_netdev = NULL;
+ ice_set_sriov_cap(pf);
+ ice_set_rdma_cap(pf);
+ }
+ /* perform some cleanup in case we come back */
+ lag->bonded = false;
+ lag->role = ICE_LAG_NONE;
+}
+
+/**
+ * ice_lag_changeupper_event - handle LAG changeupper event
+ * @lag: LAG info struct
+ * @ptr: opaque pointer data
+ *
+ * ptr is to be cast into netdev_notifier_changeupper_info
+ */
+static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr)
+{
+ struct netdev_notifier_changeupper_info *info;
+ struct net_device *netdev;
+
+ info = ptr;
+ netdev = netdev_notifier_info_to_dev(ptr);
+
+ /* not for this netdev */
+ if (netdev != lag->netdev)
+ return;
+
+ if (!info->upper_dev) {
+ netdev_dbg(netdev, "changeupper rcvd, but no upper defined\n");
+ return;
+ }
+
+ netdev_dbg(netdev, "bonding %s\n", info->linking ? "LINK" : "UNLINK");
+
+ if (!netif_is_lag_master(info->upper_dev)) {
+ netdev_dbg(netdev, "changeupper rcvd, but not primary. bail\n");
+ return;
+ }
+
+ if (info->linking)
+ ice_lag_link(lag, info);
+ else
+ ice_lag_unlink(lag, info);
+
+ ice_display_lag_info(lag);
+}
+
+/**
+ * ice_lag_changelower_event - handle LAG changelower event
+ * @lag: LAG info struct
+ * @ptr: opaque data pointer
+ *
+ * ptr to be cast to netdev_notifier_changelowerstate_info
+ */
+static void ice_lag_changelower_event(struct ice_lag *lag, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ if (netdev != lag->netdev)
+ return;
+
+ netdev_dbg(netdev, "bonding info\n");
+
+ if (!netif_is_lag_port(netdev))
+ netdev_dbg(netdev, "CHANGELOWER rcvd, but netdev not in LAG. Bail\n");
+}
+
+/**
+ * ice_lag_event_handler - handle LAG events from netdev
+ * @notif_blk: notifier block registered by this netdev
+ * @event: event type
+ * @ptr: opaque data containing notifier event
+ */
+static int
+ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event,
+ void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+ struct ice_lag *lag;
+
+ lag = container_of(notif_blk, struct ice_lag, notif_block);
+
+ if (!lag->netdev)
+ return NOTIFY_DONE;
+
+ /* Check that the netdev is in the working namespace */
+ if (!net_eq(dev_net(netdev), &init_net))
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ ice_lag_changeupper_event(lag, ptr);
+ break;
+ case NETDEV_CHANGELOWERSTATE:
+ ice_lag_changelower_event(lag, ptr);
+ break;
+ case NETDEV_BONDING_INFO:
+ ice_lag_info_event(lag, ptr);
+ break;
+ case NETDEV_UNREGISTER:
+ ice_lag_unregister(lag, netdev);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * ice_register_lag_handler - register LAG handler on netdev
+ * @lag: LAG struct
+ */
+static int ice_register_lag_handler(struct ice_lag *lag)
+{
+ struct device *dev = ice_pf_to_dev(lag->pf);
+ struct notifier_block *notif_blk;
+
+ notif_blk = &lag->notif_block;
+
+ if (!notif_blk->notifier_call) {
+ notif_blk->notifier_call = ice_lag_event_handler;
+ if (register_netdevice_notifier(notif_blk)) {
+ notif_blk->notifier_call = NULL;
+ dev_err(dev, "FAIL register LAG event handler!\n");
+ return -EINVAL;
+ }
+ dev_dbg(dev, "LAG event handler registered\n");
+ }
+ return 0;
+}
+
+/**
+ * ice_unregister_lag_handler - unregister LAG handler on netdev
+ * @lag: LAG struct
+ */
+static void ice_unregister_lag_handler(struct ice_lag *lag)
+{
+ struct device *dev = ice_pf_to_dev(lag->pf);
+ struct notifier_block *notif_blk;
+
+ notif_blk = &lag->notif_block;
+ if (notif_blk->notifier_call) {
+ unregister_netdevice_notifier(notif_blk);
+ dev_dbg(dev, "LAG event handler unregistered\n");
+ }
+}
+
+/**
+ * ice_init_lag - initialize support for LAG
+ * @pf: PF struct
+ *
+ * Alloc memory for LAG structs and initialize the elements.
+ * Memory will be freed in ice_deinit_lag
+ */
+int ice_init_lag(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_lag *lag;
+ struct ice_vsi *vsi;
+ int err;
+
+ pf->lag = kzalloc(sizeof(*lag), GFP_KERNEL);
+ if (!pf->lag)
+ return -ENOMEM;
+ lag = pf->lag;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi) {
+ dev_err(dev, "couldn't get main vsi, link aggregation init fail\n");
+ err = -EIO;
+ goto lag_error;
+ }
+
+ lag->pf = pf;
+ lag->netdev = vsi->netdev;
+ lag->role = ICE_LAG_NONE;
+ lag->bonded = false;
+ lag->peer_netdev = NULL;
+ lag->upper_netdev = NULL;
+ lag->notif_block.notifier_call = NULL;
+
+ err = ice_register_lag_handler(lag);
+ if (err) {
+ dev_warn(dev, "INIT LAG: Failed to register event handler\n");
+ goto lag_error;
+ }
+
+ ice_display_lag_info(lag);
+
+ dev_dbg(dev, "INIT LAG complete\n");
+ return 0;
+
+lag_error:
+ kfree(lag);
+ pf->lag = NULL;
+ return err;
+}
+
+/**
+ * ice_deinit_lag - Clean up LAG
+ * @pf: PF struct
+ *
+ * Clean up kernel LAG info and free memory
+ * This function is meant to only be called on driver remove/shutdown
+ */
+void ice_deinit_lag(struct ice_pf *pf)
+{
+ struct ice_lag *lag;
+
+ lag = pf->lag;
+
+ if (!lag)
+ return;
+
+ if (lag->pf)
+ ice_unregister_lag_handler(lag);
+
+ dev_put(lag->upper_netdev);
+
+ dev_put(lag->peer_netdev);
+
+ kfree(lag);
+
+ pf->lag = NULL;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h
new file mode 100644
index 000000000..51b5cf467
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lag.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2021, Intel Corporation. */
+
+#ifndef _ICE_LAG_H_
+#define _ICE_LAG_H_
+
+#include <linux/netdevice.h>
+
+/* LAG roles for netdev */
+enum ice_lag_role {
+ ICE_LAG_NONE,
+ ICE_LAG_PRIMARY,
+ ICE_LAG_BACKUP,
+ ICE_LAG_UNSET
+};
+
+struct ice_pf;
+
+/* LAG info struct */
+struct ice_lag {
+ struct ice_pf *pf; /* backlink to PF struct */
+ struct net_device *netdev; /* this PF's netdev */
+ struct net_device *peer_netdev;
+ struct net_device *upper_netdev; /* upper bonding netdev */
+ struct notifier_block notif_block;
+ u8 bonded:1; /* currently bonded */
+ u8 primary:1; /* this is primary */
+ u8 handler:1; /* did we register a rx_netdev_handler */
+ /* each thing blocking bonding will increment this value by one.
+ * If this value is zero, then bonding is allowed.
+ */
+ u16 dis_lag;
+ u8 role;
+};
+
+int ice_init_lag(struct ice_pf *pf);
+void ice_deinit_lag(struct ice_pf *pf);
+rx_handler_result_t ice_lag_nop_handler(struct sk_buff **pskb);
+
+/**
+ * ice_disable_lag - increment LAG disable count
+ * @lag: LAG struct
+ */
+static inline void ice_disable_lag(struct ice_lag *lag)
+{
+ /* If LAG this PF is not already disabled, disable it */
+ rtnl_lock();
+ if (!netdev_is_rx_handler_busy(lag->netdev)) {
+ if (!netdev_rx_handler_register(lag->netdev,
+ ice_lag_nop_handler,
+ NULL))
+ lag->handler = true;
+ }
+ rtnl_unlock();
+ lag->dis_lag++;
+}
+
+/**
+ * ice_enable_lag - decrement disable count for a PF
+ * @lag: LAG struct
+ *
+ * Decrement the disable counter for a port, and if that count reaches
+ * zero, then remove the no-op Rx handler from that netdev
+ */
+static inline void ice_enable_lag(struct ice_lag *lag)
+{
+ if (lag->dis_lag)
+ lag->dis_lag--;
+ if (!lag->dis_lag && lag->handler) {
+ rtnl_lock();
+ netdev_rx_handler_unregister(lag->netdev);
+ rtnl_unlock();
+ lag->handler = false;
+ }
+}
+
+/**
+ * ice_is_lag_dis - is LAG disabled
+ * @lag: LAG struct
+ *
+ * Return true if bonding is disabled
+ */
+static inline bool ice_is_lag_dis(struct ice_lag *lag)
+{
+ return !!(lag->dis_lag);
+}
+#endif /* _ICE_LAG_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
new file mode 100644
index 000000000..b3baf7c3f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -0,0 +1,924 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_LAN_TX_RX_H_
+#define _ICE_LAN_TX_RX_H_
+
+union ice_32byte_rx_desc {
+ struct {
+ __le64 pkt_addr; /* Packet buffer address */
+ __le64 hdr_addr; /* Header buffer address */
+ /* bit 0 of hdr_addr is DD bit */
+ __le64 rsvd1;
+ __le64 rsvd2;
+ } read;
+ struct {
+ struct {
+ struct {
+ __le16 mirroring_status;
+ __le16 l2tag1;
+ } lo_dword;
+ union {
+ __le32 rss; /* RSS Hash */
+ __le32 fd_id; /* Flow Director filter ID */
+ } hi_dword;
+ } qword0;
+ struct {
+ /* status/error/PTYPE/length */
+ __le64 status_error_len;
+ } qword1;
+ struct {
+ __le16 ext_status; /* extended status */
+ __le16 rsvd;
+ __le16 l2tag2_1;
+ __le16 l2tag2_2;
+ } qword2;
+ struct {
+ __le32 reserved;
+ __le32 fd_id;
+ } qword3;
+ } wb; /* writeback */
+};
+
+struct ice_fltr_desc {
+ __le64 qidx_compq_space_stat;
+ __le64 dtype_cmd_vsi_fdid;
+};
+
+#define ICE_FXD_FLTR_QW0_QINDEX_S 0
+#define ICE_FXD_FLTR_QW0_QINDEX_M (0x7FFULL << ICE_FXD_FLTR_QW0_QINDEX_S)
+#define ICE_FXD_FLTR_QW0_COMP_Q_S 11
+#define ICE_FXD_FLTR_QW0_COMP_Q_M BIT_ULL(ICE_FXD_FLTR_QW0_COMP_Q_S)
+#define ICE_FXD_FLTR_QW0_COMP_Q_ZERO 0x0ULL
+
+#define ICE_FXD_FLTR_QW0_COMP_REPORT_S 12
+#define ICE_FXD_FLTR_QW0_COMP_REPORT_M \
+ (0x3ULL << ICE_FXD_FLTR_QW0_COMP_REPORT_S)
+#define ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL 0x1ULL
+#define ICE_FXD_FLTR_QW0_COMP_REPORT_SW 0x2ULL
+
+#define ICE_FXD_FLTR_QW0_FD_SPACE_S 14
+#define ICE_FXD_FLTR_QW0_FD_SPACE_M (0x3ULL << ICE_FXD_FLTR_QW0_FD_SPACE_S)
+#define ICE_FXD_FLTR_QW0_FD_SPACE_GUAR_BEST 0x2ULL
+
+#define ICE_FXD_FLTR_QW0_STAT_CNT_S 16
+#define ICE_FXD_FLTR_QW0_STAT_CNT_M \
+ (0x1FFFULL << ICE_FXD_FLTR_QW0_STAT_CNT_S)
+#define ICE_FXD_FLTR_QW0_STAT_ENA_S 29
+#define ICE_FXD_FLTR_QW0_STAT_ENA_M (0x3ULL << ICE_FXD_FLTR_QW0_STAT_ENA_S)
+#define ICE_FXD_FLTR_QW0_STAT_ENA_PKTS 0x1ULL
+
+#define ICE_FXD_FLTR_QW0_EVICT_ENA_S 31
+#define ICE_FXD_FLTR_QW0_EVICT_ENA_M BIT_ULL(ICE_FXD_FLTR_QW0_EVICT_ENA_S)
+#define ICE_FXD_FLTR_QW0_EVICT_ENA_FALSE 0x0ULL
+#define ICE_FXD_FLTR_QW0_EVICT_ENA_TRUE 0x1ULL
+
+#define ICE_FXD_FLTR_QW0_TO_Q_S 32
+#define ICE_FXD_FLTR_QW0_TO_Q_M (0x7ULL << ICE_FXD_FLTR_QW0_TO_Q_S)
+#define ICE_FXD_FLTR_QW0_TO_Q_EQUALS_QINDEX 0x0ULL
+
+#define ICE_FXD_FLTR_QW0_TO_Q_PRI_S 35
+#define ICE_FXD_FLTR_QW0_TO_Q_PRI_M (0x7ULL << ICE_FXD_FLTR_QW0_TO_Q_PRI_S)
+#define ICE_FXD_FLTR_QW0_TO_Q_PRIO1 0x1ULL
+
+#define ICE_FXD_FLTR_QW0_DPU_RECIPE_S 38
+#define ICE_FXD_FLTR_QW0_DPU_RECIPE_M \
+ (0x3ULL << ICE_FXD_FLTR_QW0_DPU_RECIPE_S)
+#define ICE_FXD_FLTR_QW0_DPU_RECIPE_DFLT 0x0ULL
+
+#define ICE_FXD_FLTR_QW0_DROP_S 40
+#define ICE_FXD_FLTR_QW0_DROP_M BIT_ULL(ICE_FXD_FLTR_QW0_DROP_S)
+#define ICE_FXD_FLTR_QW0_DROP_NO 0x0ULL
+#define ICE_FXD_FLTR_QW0_DROP_YES 0x1ULL
+
+#define ICE_FXD_FLTR_QW0_FLEX_PRI_S 41
+#define ICE_FXD_FLTR_QW0_FLEX_PRI_M (0x7ULL << ICE_FXD_FLTR_QW0_FLEX_PRI_S)
+#define ICE_FXD_FLTR_QW0_FLEX_PRI_NONE 0x0ULL
+
+#define ICE_FXD_FLTR_QW0_FLEX_MDID_S 44
+#define ICE_FXD_FLTR_QW0_FLEX_MDID_M (0xFULL << ICE_FXD_FLTR_QW0_FLEX_MDID_S)
+#define ICE_FXD_FLTR_QW0_FLEX_MDID0 0x0ULL
+
+#define ICE_FXD_FLTR_QW0_FLEX_VAL_S 48
+#define ICE_FXD_FLTR_QW0_FLEX_VAL_M \
+ (0xFFFFULL << ICE_FXD_FLTR_QW0_FLEX_VAL_S)
+#define ICE_FXD_FLTR_QW0_FLEX_VAL0 0x0ULL
+
+#define ICE_FXD_FLTR_QW1_DTYPE_S 0
+#define ICE_FXD_FLTR_QW1_DTYPE_M (0xFULL << ICE_FXD_FLTR_QW1_DTYPE_S)
+#define ICE_FXD_FLTR_QW1_PCMD_S 4
+#define ICE_FXD_FLTR_QW1_PCMD_M BIT_ULL(ICE_FXD_FLTR_QW1_PCMD_S)
+#define ICE_FXD_FLTR_QW1_PCMD_ADD 0x0ULL
+#define ICE_FXD_FLTR_QW1_PCMD_REMOVE 0x1ULL
+
+#define ICE_FXD_FLTR_QW1_PROF_PRI_S 5
+#define ICE_FXD_FLTR_QW1_PROF_PRI_M (0x7ULL << ICE_FXD_FLTR_QW1_PROF_PRI_S)
+#define ICE_FXD_FLTR_QW1_PROF_PRIO_ZERO 0x0ULL
+
+#define ICE_FXD_FLTR_QW1_PROF_S 8
+#define ICE_FXD_FLTR_QW1_PROF_M (0x3FULL << ICE_FXD_FLTR_QW1_PROF_S)
+#define ICE_FXD_FLTR_QW1_PROF_ZERO 0x0ULL
+
+#define ICE_FXD_FLTR_QW1_FD_VSI_S 14
+#define ICE_FXD_FLTR_QW1_FD_VSI_M (0x3FFULL << ICE_FXD_FLTR_QW1_FD_VSI_S)
+#define ICE_FXD_FLTR_QW1_SWAP_S 24
+#define ICE_FXD_FLTR_QW1_SWAP_M BIT_ULL(ICE_FXD_FLTR_QW1_SWAP_S)
+#define ICE_FXD_FLTR_QW1_SWAP_NOT_SET 0x0ULL
+#define ICE_FXD_FLTR_QW1_SWAP_SET 0x1ULL
+
+#define ICE_FXD_FLTR_QW1_FDID_PRI_S 25
+#define ICE_FXD_FLTR_QW1_FDID_PRI_M (0x7ULL << ICE_FXD_FLTR_QW1_FDID_PRI_S)
+#define ICE_FXD_FLTR_QW1_FDID_PRI_ONE 0x1ULL
+#define ICE_FXD_FLTR_QW1_FDID_PRI_THREE 0x3ULL
+
+#define ICE_FXD_FLTR_QW1_FDID_MDID_S 28
+#define ICE_FXD_FLTR_QW1_FDID_MDID_M (0xFULL << ICE_FXD_FLTR_QW1_FDID_MDID_S)
+#define ICE_FXD_FLTR_QW1_FDID_MDID_FD 0x05ULL
+
+#define ICE_FXD_FLTR_QW1_FDID_S 32
+#define ICE_FXD_FLTR_QW1_FDID_M \
+ (0xFFFFFFFFULL << ICE_FXD_FLTR_QW1_FDID_S)
+#define ICE_FXD_FLTR_QW1_FDID_ZERO 0x0ULL
+
+/* definition for FD filter programming status descriptor WB format */
+#define ICE_FXD_FLTR_WB_QW1_DD_S 0
+#define ICE_FXD_FLTR_WB_QW1_DD_M (0x1ULL << ICE_FXD_FLTR_WB_QW1_DD_S)
+#define ICE_FXD_FLTR_WB_QW1_DD_YES 0x1ULL
+
+#define ICE_FXD_FLTR_WB_QW1_PROG_ID_S 1
+#define ICE_FXD_FLTR_WB_QW1_PROG_ID_M \
+ (0x3ULL << ICE_FXD_FLTR_WB_QW1_PROG_ID_S)
+#define ICE_FXD_FLTR_WB_QW1_PROG_ADD 0x0ULL
+#define ICE_FXD_FLTR_WB_QW1_PROG_DEL 0x1ULL
+
+#define ICE_FXD_FLTR_WB_QW1_FAIL_S 4
+#define ICE_FXD_FLTR_WB_QW1_FAIL_M (0x1ULL << ICE_FXD_FLTR_WB_QW1_FAIL_S)
+#define ICE_FXD_FLTR_WB_QW1_FAIL_YES 0x1ULL
+
+#define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S 5
+#define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_M \
+ (0x1ULL << ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S)
+#define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_YES 0x1ULL
+
+struct ice_rx_ptype_decoded {
+ u32 known:1;
+ u32 outer_ip:1;
+ u32 outer_ip_ver:2;
+ u32 outer_frag:1;
+ u32 tunnel_type:3;
+ u32 tunnel_end_prot:2;
+ u32 tunnel_end_frag:1;
+ u32 inner_prot:4;
+ u32 payload_layer:3;
+};
+
+enum ice_rx_ptype_outer_ip {
+ ICE_RX_PTYPE_OUTER_L2 = 0,
+ ICE_RX_PTYPE_OUTER_IP = 1,
+};
+
+enum ice_rx_ptype_outer_ip_ver {
+ ICE_RX_PTYPE_OUTER_NONE = 0,
+ ICE_RX_PTYPE_OUTER_IPV4 = 1,
+ ICE_RX_PTYPE_OUTER_IPV6 = 2,
+};
+
+enum ice_rx_ptype_outer_fragmented {
+ ICE_RX_PTYPE_NOT_FRAG = 0,
+ ICE_RX_PTYPE_FRAG = 1,
+};
+
+enum ice_rx_ptype_tunnel_type {
+ ICE_RX_PTYPE_TUNNEL_NONE = 0,
+ ICE_RX_PTYPE_TUNNEL_IP_IP = 1,
+ ICE_RX_PTYPE_TUNNEL_IP_GRENAT = 2,
+ ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC = 3,
+ ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4,
+};
+
+enum ice_rx_ptype_tunnel_end_prot {
+ ICE_RX_PTYPE_TUNNEL_END_NONE = 0,
+ ICE_RX_PTYPE_TUNNEL_END_IPV4 = 1,
+ ICE_RX_PTYPE_TUNNEL_END_IPV6 = 2,
+};
+
+enum ice_rx_ptype_inner_prot {
+ ICE_RX_PTYPE_INNER_PROT_NONE = 0,
+ ICE_RX_PTYPE_INNER_PROT_UDP = 1,
+ ICE_RX_PTYPE_INNER_PROT_TCP = 2,
+ ICE_RX_PTYPE_INNER_PROT_SCTP = 3,
+ ICE_RX_PTYPE_INNER_PROT_ICMP = 4,
+ ICE_RX_PTYPE_INNER_PROT_TIMESYNC = 5,
+};
+
+enum ice_rx_ptype_payload_layer {
+ ICE_RX_PTYPE_PAYLOAD_LAYER_NONE = 0,
+ ICE_RX_PTYPE_PAYLOAD_LAYER_PAY2 = 1,
+ ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3 = 2,
+ ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4 = 3,
+};
+
+/* Rx Flex Descriptor
+ * This descriptor is used instead of the legacy version descriptor when
+ * ice_rlan_ctx.adv_desc is set
+ */
+union ice_32b_rx_flex_desc {
+ struct {
+ __le64 pkt_addr; /* Packet buffer address */
+ __le64 hdr_addr; /* Header buffer address */
+ /* bit 0 of hdr_addr is DD bit */
+ __le64 rsvd1;
+ __le64 rsvd2;
+ } read;
+ struct {
+ /* Qword 0 */
+ u8 rxdid; /* descriptor builder profile ID */
+ u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
+ __le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
+ __le16 pkt_len; /* [15:14] are reserved */
+ __le16 hdr_len_sph_flex_flags1; /* header=[10:0] */
+ /* sph=[11:11] */
+ /* ff1/ext=[15:12] */
+
+ /* Qword 1 */
+ __le16 status_error0;
+ __le16 l2tag1;
+ __le16 flex_meta0;
+ __le16 flex_meta1;
+
+ /* Qword 2 */
+ __le16 status_error1;
+ u8 flex_flags2;
+ u8 time_stamp_low;
+ __le16 l2tag2_1st;
+ __le16 l2tag2_2nd;
+
+ /* Qword 3 */
+ __le16 flex_meta2;
+ __le16 flex_meta3;
+ union {
+ struct {
+ __le16 flex_meta4;
+ __le16 flex_meta5;
+ } flex;
+ __le32 ts_high;
+ } flex_ts;
+ } wb; /* writeback */
+};
+
+/* Rx Flex Descriptor NIC Profile
+ * This descriptor corresponds to RxDID 2 which contains
+ * metadata fields for RSS, flow ID and timestamp info
+ */
+struct ice_32b_rx_flex_desc_nic {
+ /* Qword 0 */
+ u8 rxdid;
+ u8 mir_id_umb_cast;
+ __le16 ptype_flexi_flags0;
+ __le16 pkt_len;
+ __le16 hdr_len_sph_flex_flags1;
+
+ /* Qword 1 */
+ __le16 status_error0;
+ __le16 l2tag1;
+ __le32 rss_hash;
+
+ /* Qword 2 */
+ __le16 status_error1;
+ u8 flexi_flags2;
+ u8 ts_low;
+ __le16 l2tag2_1st;
+ __le16 l2tag2_2nd;
+
+ /* Qword 3 */
+ __le32 flow_id;
+ union {
+ struct {
+ __le16 vlan_id;
+ __le16 flow_id_ipv6;
+ } flex;
+ __le32 ts_high;
+ } flex_ts;
+};
+
+/* Rx Flex Descriptor NIC Profile
+ * RxDID Profile ID 6
+ * Flex-field 0: RSS hash lower 16-bits
+ * Flex-field 1: RSS hash upper 16-bits
+ * Flex-field 2: Flow ID lower 16-bits
+ * Flex-field 3: Source VSI
+ * Flex-field 4: reserved, VLAN ID taken from L2Tag
+ */
+struct ice_32b_rx_flex_desc_nic_2 {
+ /* Qword 0 */
+ u8 rxdid;
+ u8 mir_id_umb_cast;
+ __le16 ptype_flexi_flags0;
+ __le16 pkt_len;
+ __le16 hdr_len_sph_flex_flags1;
+
+ /* Qword 1 */
+ __le16 status_error0;
+ __le16 l2tag1;
+ __le32 rss_hash;
+
+ /* Qword 2 */
+ __le16 status_error1;
+ u8 flexi_flags2;
+ u8 ts_low;
+ __le16 l2tag2_1st;
+ __le16 l2tag2_2nd;
+
+ /* Qword 3 */
+ __le16 flow_id;
+ __le16 src_vsi;
+ union {
+ struct {
+ __le16 rsvd;
+ __le16 flow_id_ipv6;
+ } flex;
+ __le32 ts_high;
+ } flex_ts;
+};
+
+/* Receive Flex Descriptor profile IDs: There are a total
+ * of 64 profiles where profile IDs 0/1 are for legacy; and
+ * profiles 2-63 are flex profiles that can be programmed
+ * with a specific metadata (profile 7 reserved for HW)
+ */
+enum ice_rxdid {
+ ICE_RXDID_LEGACY_0 = 0,
+ ICE_RXDID_LEGACY_1 = 1,
+ ICE_RXDID_FLEX_NIC = 2,
+ ICE_RXDID_FLEX_NIC_2 = 6,
+ ICE_RXDID_HW = 7,
+ ICE_RXDID_LAST = 63,
+};
+
+/* Receive Flex Descriptor Rx opcode values */
+#define ICE_RX_OPC_MDID 0x01
+
+/* Receive Descriptor MDID values that access packet flags */
+enum ice_flex_mdid_pkt_flags {
+ ICE_RX_MDID_PKT_FLAGS_15_0 = 20,
+ ICE_RX_MDID_PKT_FLAGS_31_16,
+ ICE_RX_MDID_PKT_FLAGS_47_32,
+ ICE_RX_MDID_PKT_FLAGS_63_48,
+};
+
+/* Receive Descriptor MDID values */
+enum ice_flex_rx_mdid {
+ ICE_RX_MDID_FLOW_ID_LOWER = 5,
+ ICE_RX_MDID_FLOW_ID_HIGH,
+ ICE_RX_MDID_SRC_VSI = 19,
+ ICE_RX_MDID_HASH_LOW = 56,
+ ICE_RX_MDID_HASH_HIGH,
+};
+
+/* Rx/Tx Flag64 packet flag bits */
+enum ice_flg64_bits {
+ ICE_FLG_PKT_DSI = 0,
+ ICE_FLG_EVLAN_x8100 = 14,
+ ICE_FLG_EVLAN_x9100,
+ ICE_FLG_VLAN_x8100,
+ ICE_FLG_TNL_MAC = 22,
+ ICE_FLG_TNL_VLAN,
+ ICE_FLG_PKT_FRG,
+ ICE_FLG_FIN = 32,
+ ICE_FLG_SYN,
+ ICE_FLG_RST,
+ ICE_FLG_TNL0 = 38,
+ ICE_FLG_TNL1,
+ ICE_FLG_TNL2,
+ ICE_FLG_UDP_GRE,
+ ICE_FLG_RSVD = 63
+};
+
+/* for ice_32byte_rx_flex_desc.ptype_flexi_flags0 member */
+#define ICE_RX_FLEX_DESC_PTYPE_M (0x3FF) /* 10-bits */
+
+/* for ice_32byte_rx_flex_desc.pkt_length member */
+#define ICE_RX_FLX_DESC_PKT_LEN_M (0x3FFF) /* 14-bits */
+
+enum ice_rx_flex_desc_status_error_0_bits {
+ /* Note: These are predefined bit offsets */
+ ICE_RX_FLEX_DESC_STATUS0_DD_S = 0,
+ ICE_RX_FLEX_DESC_STATUS0_EOF_S,
+ ICE_RX_FLEX_DESC_STATUS0_HBO_S,
+ ICE_RX_FLEX_DESC_STATUS0_L3L4P_S,
+ ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S,
+ ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S,
+ ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S,
+ ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S,
+ ICE_RX_FLEX_DESC_STATUS0_LPBK_S,
+ ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S,
+ ICE_RX_FLEX_DESC_STATUS0_RXE_S,
+ ICE_RX_FLEX_DESC_STATUS0_CRCP_S,
+ ICE_RX_FLEX_DESC_STATUS0_RSS_VALID_S,
+ ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S,
+ ICE_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_S,
+ ICE_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_S,
+ ICE_RX_FLEX_DESC_STATUS0_LAST /* this entry must be last!!! */
+};
+
+enum ice_rx_flex_desc_status_error_1_bits {
+ /* Note: These are predefined bit offsets */
+ ICE_RX_FLEX_DESC_STATUS1_NAT_S = 4,
+ /* [10:5] reserved */
+ ICE_RX_FLEX_DESC_STATUS1_L2TAG2P_S = 11,
+ ICE_RX_FLEX_DESC_STATUS1_LAST /* this entry must be last!!! */
+};
+
+#define ICE_RXQ_CTX_SIZE_DWORDS 8
+#define ICE_RXQ_CTX_SZ (ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32))
+#define ICE_TX_CMPLTNQ_CTX_SIZE_DWORDS 22
+#define ICE_TX_DRBELL_Q_CTX_SIZE_DWORDS 5
+#define GLTCLAN_CQ_CNTX(i, CQ) (GLTCLAN_CQ_CNTX0(CQ) + ((i) * 0x0800))
+
+/* RLAN Rx queue context data
+ *
+ * The sizes of the variables may be larger than needed due to crossing byte
+ * boundaries. If we do not have the width of the variable set to the correct
+ * size then we could end up shifting bits off the top of the variable when the
+ * variable is at the top of a byte and crosses over into the next byte.
+ */
+struct ice_rlan_ctx {
+ u16 head;
+ u16 cpuid; /* bigger than needed, see above for reason */
+#define ICE_RLAN_BASE_S 7
+ u64 base;
+ u16 qlen;
+#define ICE_RLAN_CTX_DBUF_S 7
+ u16 dbuf; /* bigger than needed, see above for reason */
+#define ICE_RLAN_CTX_HBUF_S 6
+ u16 hbuf; /* bigger than needed, see above for reason */
+ u8 dtype;
+ u8 dsize;
+ u8 crcstrip;
+ u8 l2tsel;
+ u8 hsplit_0;
+ u8 hsplit_1;
+ u8 showiv;
+ u32 rxmax; /* bigger than needed, see above for reason */
+ u8 tphrdesc_ena;
+ u8 tphwdesc_ena;
+ u8 tphdata_ena;
+ u8 tphhead_ena;
+ u16 lrxqthresh; /* bigger than needed, see above for reason */
+ u8 prefena; /* NOTE: normally must be set to 1 at init */
+};
+
+struct ice_ctx_ele {
+ u16 offset;
+ u16 size_of;
+ u16 width;
+ u16 lsb;
+};
+
+#define ICE_CTX_STORE(_struct, _ele, _width, _lsb) { \
+ .offset = offsetof(struct _struct, _ele), \
+ .size_of = sizeof_field(struct _struct, _ele), \
+ .width = _width, \
+ .lsb = _lsb, \
+}
+
+/* for hsplit_0 field of Rx RLAN context */
+enum ice_rlan_ctx_rx_hsplit_0 {
+ ICE_RLAN_RX_HSPLIT_0_NO_SPLIT = 0,
+ ICE_RLAN_RX_HSPLIT_0_SPLIT_L2 = 1,
+ ICE_RLAN_RX_HSPLIT_0_SPLIT_IP = 2,
+ ICE_RLAN_RX_HSPLIT_0_SPLIT_TCP_UDP = 4,
+ ICE_RLAN_RX_HSPLIT_0_SPLIT_SCTP = 8,
+};
+
+/* for hsplit_1 field of Rx RLAN context */
+enum ice_rlan_ctx_rx_hsplit_1 {
+ ICE_RLAN_RX_HSPLIT_1_NO_SPLIT = 0,
+ ICE_RLAN_RX_HSPLIT_1_SPLIT_L2 = 1,
+ ICE_RLAN_RX_HSPLIT_1_SPLIT_ALWAYS = 2,
+};
+
+/* Tx Descriptor */
+struct ice_tx_desc {
+ __le64 buf_addr; /* Address of descriptor's data buf */
+ __le64 cmd_type_offset_bsz;
+};
+
+enum ice_tx_desc_dtype_value {
+ ICE_TX_DESC_DTYPE_DATA = 0x0,
+ ICE_TX_DESC_DTYPE_CTX = 0x1,
+ ICE_TX_DESC_DTYPE_FLTR_PROG = 0x8,
+ /* DESC_DONE - HW has completed write-back of descriptor */
+ ICE_TX_DESC_DTYPE_DESC_DONE = 0xF,
+};
+
+#define ICE_TXD_QW1_CMD_S 4
+#define ICE_TXD_QW1_CMD_M (0xFFFUL << ICE_TXD_QW1_CMD_S)
+
+enum ice_tx_desc_cmd_bits {
+ ICE_TX_DESC_CMD_EOP = 0x0001,
+ ICE_TX_DESC_CMD_RS = 0x0002,
+ ICE_TX_DESC_CMD_IL2TAG1 = 0x0008,
+ ICE_TX_DESC_CMD_DUMMY = 0x0010,
+ ICE_TX_DESC_CMD_IIPT_IPV6 = 0x0020,
+ ICE_TX_DESC_CMD_IIPT_IPV4 = 0x0040,
+ ICE_TX_DESC_CMD_IIPT_IPV4_CSUM = 0x0060,
+ ICE_TX_DESC_CMD_L4T_EOFT_TCP = 0x0100,
+ ICE_TX_DESC_CMD_L4T_EOFT_SCTP = 0x0200,
+ ICE_TX_DESC_CMD_L4T_EOFT_UDP = 0x0300,
+ ICE_TX_DESC_CMD_RE = 0x0400,
+};
+
+#define ICE_TXD_QW1_OFFSET_S 16
+#define ICE_TXD_QW1_OFFSET_M (0x3FFFFULL << ICE_TXD_QW1_OFFSET_S)
+
+enum ice_tx_desc_len_fields {
+ /* Note: These are predefined bit offsets */
+ ICE_TX_DESC_LEN_MACLEN_S = 0, /* 7 BITS */
+ ICE_TX_DESC_LEN_IPLEN_S = 7, /* 7 BITS */
+ ICE_TX_DESC_LEN_L4_LEN_S = 14 /* 4 BITS */
+};
+
+#define ICE_TXD_QW1_MACLEN_M (0x7FUL << ICE_TX_DESC_LEN_MACLEN_S)
+#define ICE_TXD_QW1_IPLEN_M (0x7FUL << ICE_TX_DESC_LEN_IPLEN_S)
+#define ICE_TXD_QW1_L4LEN_M (0xFUL << ICE_TX_DESC_LEN_L4_LEN_S)
+
+/* Tx descriptor field limits in bytes */
+#define ICE_TXD_MACLEN_MAX ((ICE_TXD_QW1_MACLEN_M >> \
+ ICE_TX_DESC_LEN_MACLEN_S) * ICE_BYTES_PER_WORD)
+#define ICE_TXD_IPLEN_MAX ((ICE_TXD_QW1_IPLEN_M >> \
+ ICE_TX_DESC_LEN_IPLEN_S) * ICE_BYTES_PER_DWORD)
+#define ICE_TXD_L4LEN_MAX ((ICE_TXD_QW1_L4LEN_M >> \
+ ICE_TX_DESC_LEN_L4_LEN_S) * ICE_BYTES_PER_DWORD)
+
+#define ICE_TXD_QW1_TX_BUF_SZ_S 34
+#define ICE_TXD_QW1_L2TAG1_S 48
+
+/* Context descriptors */
+struct ice_tx_ctx_desc {
+ __le32 tunneling_params;
+ __le16 l2tag2;
+ __le16 rsvd;
+ __le64 qw1;
+};
+
+#define ICE_TXD_CTX_QW1_CMD_S 4
+#define ICE_TXD_CTX_QW1_CMD_M (0x7FUL << ICE_TXD_CTX_QW1_CMD_S)
+
+#define ICE_TXD_CTX_QW1_TSO_LEN_S 30
+#define ICE_TXD_CTX_QW1_TSO_LEN_M \
+ (0x3FFFFULL << ICE_TXD_CTX_QW1_TSO_LEN_S)
+
+#define ICE_TXD_CTX_QW1_MSS_S 50
+#define ICE_TXD_CTX_MIN_MSS 64
+
+#define ICE_TXD_CTX_QW1_VSI_S 50
+#define ICE_TXD_CTX_QW1_VSI_M (0x3FFULL << ICE_TXD_CTX_QW1_VSI_S)
+
+enum ice_tx_ctx_desc_cmd_bits {
+ ICE_TX_CTX_DESC_TSO = 0x01,
+ ICE_TX_CTX_DESC_TSYN = 0x02,
+ ICE_TX_CTX_DESC_IL2TAG2 = 0x04,
+ ICE_TX_CTX_DESC_IL2TAG2_IL2H = 0x08,
+ ICE_TX_CTX_DESC_SWTCH_NOTAG = 0x00,
+ ICE_TX_CTX_DESC_SWTCH_UPLINK = 0x10,
+ ICE_TX_CTX_DESC_SWTCH_LOCAL = 0x20,
+ ICE_TX_CTX_DESC_SWTCH_VSI = 0x30,
+ ICE_TX_CTX_DESC_RESERVED = 0x40
+};
+
+enum ice_tx_ctx_desc_eipt_offload {
+ ICE_TX_CTX_EIPT_NONE = 0x0,
+ ICE_TX_CTX_EIPT_IPV6 = 0x1,
+ ICE_TX_CTX_EIPT_IPV4_NO_CSUM = 0x2,
+ ICE_TX_CTX_EIPT_IPV4 = 0x3
+};
+
+#define ICE_TXD_CTX_QW0_EIPLEN_S 2
+
+#define ICE_TXD_CTX_QW0_L4TUNT_S 9
+
+#define ICE_TXD_CTX_UDP_TUNNELING BIT_ULL(ICE_TXD_CTX_QW0_L4TUNT_S)
+#define ICE_TXD_CTX_GRE_TUNNELING (0x2ULL << ICE_TXD_CTX_QW0_L4TUNT_S)
+
+#define ICE_TXD_CTX_QW0_NATLEN_S 12
+
+#define ICE_TXD_CTX_QW0_L4T_CS_S 23
+#define ICE_TXD_CTX_QW0_L4T_CS_M BIT_ULL(ICE_TXD_CTX_QW0_L4T_CS_S)
+
+#define ICE_LAN_TXQ_MAX_QGRPS 127
+#define ICE_LAN_TXQ_MAX_QDIS 1023
+
+/* Tx queue context data
+ *
+ * The sizes of the variables may be larger than needed due to crossing byte
+ * boundaries. If we do not have the width of the variable set to the correct
+ * size then we could end up shifting bits off the top of the variable when the
+ * variable is at the top of a byte and crosses over into the next byte.
+ */
+struct ice_tlan_ctx {
+#define ICE_TLAN_CTX_BASE_S 7
+ u64 base; /* base is defined in 128-byte units */
+ u8 port_num;
+ u16 cgd_num; /* bigger than needed, see above for reason */
+ u8 pf_num;
+ u16 vmvf_num;
+ u8 vmvf_type;
+#define ICE_TLAN_CTX_VMVF_TYPE_VF 0
+#define ICE_TLAN_CTX_VMVF_TYPE_VMQ 1
+#define ICE_TLAN_CTX_VMVF_TYPE_PF 2
+ u16 src_vsi;
+ u8 tsyn_ena;
+ u8 internal_usage_flag;
+ u8 alt_vlan;
+ u16 cpuid; /* bigger than needed, see above for reason */
+ u8 wb_mode;
+ u8 tphrd_desc;
+ u8 tphrd;
+ u8 tphwr_desc;
+ u16 cmpq_id;
+ u16 qnum_in_func;
+ u8 itr_notification_mode;
+ u8 adjust_prof_id;
+ u32 qlen; /* bigger than needed, see above for reason */
+ u8 quanta_prof_idx;
+ u8 tso_ena;
+ u16 tso_qnum;
+ u8 legacy_int;
+ u8 drop_ena;
+ u8 cache_prof_idx;
+ u8 pkt_shaper_prof_idx;
+ u8 int_q_state; /* width not needed - internal - DO NOT WRITE!!! */
+};
+
+/* The ice_ptype_lkup table is used to convert from the 10-bit ptype in the
+ * hardware to a bit-field that can be used by SW to more easily determine the
+ * packet type.
+ *
+ * Macros are used to shorten the table lines and make this table human
+ * readable.
+ *
+ * We store the PTYPE in the top byte of the bit field - this is just so that
+ * we can check that the table doesn't have a row missing, as the index into
+ * the table should be the PTYPE.
+ *
+ * Typical work flow:
+ *
+ * IF NOT ice_ptype_lkup[ptype].known
+ * THEN
+ * Packet is unknown
+ * ELSE IF ice_ptype_lkup[ptype].outer_ip == ICE_RX_PTYPE_OUTER_IP
+ * Use the rest of the fields to look at the tunnels, inner protocols, etc
+ * ELSE
+ * Use the enum ice_rx_l2_ptype to decode the packet type
+ * ENDIF
+ */
+
+/* macro to make the table lines short, use explicit indexing with [PTYPE] */
+#define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
+ [PTYPE] = { \
+ 1, \
+ ICE_RX_PTYPE_OUTER_##OUTER_IP, \
+ ICE_RX_PTYPE_OUTER_##OUTER_IP_VER, \
+ ICE_RX_PTYPE_##OUTER_FRAG, \
+ ICE_RX_PTYPE_TUNNEL_##T, \
+ ICE_RX_PTYPE_TUNNEL_END_##TE, \
+ ICE_RX_PTYPE_##TEF, \
+ ICE_RX_PTYPE_INNER_PROT_##I, \
+ ICE_RX_PTYPE_PAYLOAD_LAYER_##PL }
+
+#define ICE_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+/* shorter macros makes the table fit but are terse */
+#define ICE_RX_PTYPE_NOF ICE_RX_PTYPE_NOT_FRAG
+#define ICE_RX_PTYPE_FRG ICE_RX_PTYPE_FRAG
+
+/* Lookup table mapping in the 10-bit HW PTYPE to the bit field for decoding */
+static const struct ice_rx_ptype_decoded ice_ptype_lkup[BIT(10)] = {
+ /* L2 Packet types */
+ ICE_PTT_UNUSED_ENTRY(0),
+ ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+ ICE_PTT_UNUSED_ENTRY(2),
+ ICE_PTT_UNUSED_ENTRY(3),
+ ICE_PTT_UNUSED_ENTRY(4),
+ ICE_PTT_UNUSED_ENTRY(5),
+ ICE_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+ ICE_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+ ICE_PTT_UNUSED_ENTRY(8),
+ ICE_PTT_UNUSED_ENTRY(9),
+ ICE_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+ ICE_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+ ICE_PTT_UNUSED_ENTRY(12),
+ ICE_PTT_UNUSED_ENTRY(13),
+ ICE_PTT_UNUSED_ENTRY(14),
+ ICE_PTT_UNUSED_ENTRY(15),
+ ICE_PTT_UNUSED_ENTRY(16),
+ ICE_PTT_UNUSED_ENTRY(17),
+ ICE_PTT_UNUSED_ENTRY(18),
+ ICE_PTT_UNUSED_ENTRY(19),
+ ICE_PTT_UNUSED_ENTRY(20),
+ ICE_PTT_UNUSED_ENTRY(21),
+
+ /* Non Tunneled IPv4 */
+ ICE_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3),
+ ICE_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3),
+ ICE_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(25),
+ ICE_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP, PAY4),
+ ICE_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4),
+ ICE_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+ /* IPv4 --> IPv4 */
+ ICE_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+ ICE_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+ ICE_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(32),
+ ICE_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP, PAY4),
+ ICE_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+ ICE_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv4 --> IPv6 */
+ ICE_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+ ICE_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+ ICE_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(39),
+ ICE_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP, PAY4),
+ ICE_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+ ICE_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT */
+ ICE_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+ /* IPv4 --> GRE/NAT --> IPv4 */
+ ICE_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+ ICE_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+ ICE_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(47),
+ ICE_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4),
+ ICE_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+ ICE_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT --> IPv6 */
+ ICE_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+ ICE_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+ ICE_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(54),
+ ICE_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4),
+ ICE_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+ ICE_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT --> MAC */
+ ICE_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+ /* IPv4 --> GRE/NAT --> MAC --> IPv4 */
+ ICE_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+ ICE_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+ ICE_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(62),
+ ICE_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4),
+ ICE_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+ ICE_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT -> MAC --> IPv6 */
+ ICE_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+ ICE_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+ ICE_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(69),
+ ICE_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4),
+ ICE_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+ ICE_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT --> MAC/VLAN */
+ ICE_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+ /* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */
+ ICE_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+ ICE_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+ ICE_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(77),
+ ICE_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4),
+ ICE_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+ ICE_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */
+ ICE_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+ ICE_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+ ICE_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(84),
+ ICE_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4),
+ ICE_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+ ICE_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+ /* Non Tunneled IPv6 */
+ ICE_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
+ ICE_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
+ ICE_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(91),
+ ICE_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4),
+ ICE_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
+ ICE_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+ /* IPv6 --> IPv4 */
+ ICE_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+ ICE_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+ ICE_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(98),
+ ICE_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP, PAY4),
+ ICE_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+ ICE_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv6 --> IPv6 */
+ ICE_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+ ICE_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+ ICE_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(105),
+ ICE_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP, PAY4),
+ ICE_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+ ICE_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT */
+ ICE_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+ /* IPv6 --> GRE/NAT -> IPv4 */
+ ICE_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+ ICE_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+ ICE_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(113),
+ ICE_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4),
+ ICE_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+ ICE_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> IPv6 */
+ ICE_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+ ICE_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+ ICE_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(120),
+ ICE_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4),
+ ICE_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+ ICE_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> MAC */
+ ICE_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+ /* IPv6 --> GRE/NAT -> MAC -> IPv4 */
+ ICE_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+ ICE_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+ ICE_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(128),
+ ICE_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4),
+ ICE_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+ ICE_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> MAC -> IPv6 */
+ ICE_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+ ICE_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+ ICE_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(135),
+ ICE_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4),
+ ICE_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+ ICE_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> MAC/VLAN */
+ ICE_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+ /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */
+ ICE_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+ ICE_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+ ICE_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(143),
+ ICE_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4),
+ ICE_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+ ICE_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */
+ ICE_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+ ICE_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+ ICE_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4),
+ ICE_PTT_UNUSED_ENTRY(150),
+ ICE_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4),
+ ICE_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+ ICE_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+ /* unused entries */
+ [154 ... 1023] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+};
+
+static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype)
+{
+ return ice_ptype_lkup[ptype];
+}
+
+#define ICE_LINK_SPEED_UNKNOWN 0
+#define ICE_LINK_SPEED_10MBPS 10
+#define ICE_LINK_SPEED_100MBPS 100
+#define ICE_LINK_SPEED_1000MBPS 1000
+#define ICE_LINK_SPEED_2500MBPS 2500
+#define ICE_LINK_SPEED_5000MBPS 5000
+#define ICE_LINK_SPEED_10000MBPS 10000
+#define ICE_LINK_SPEED_20000MBPS 20000
+#define ICE_LINK_SPEED_25000MBPS 25000
+#define ICE_LINK_SPEED_40000MBPS 40000
+#define ICE_LINK_SPEED_50000MBPS 50000
+#define ICE_LINK_SPEED_100000MBPS 100000
+
+#endif /* _ICE_LAN_TX_RX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
new file mode 100644
index 000000000..c051503c3
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -0,0 +1,4298 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_flow.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
+#include "ice_dcb_lib.h"
+#include "ice_devlink.h"
+#include "ice_vsi_vlan_ops.h"
+
+/**
+ * ice_vsi_type_str - maps VSI type enum to string equivalents
+ * @vsi_type: VSI type enum
+ */
+const char *ice_vsi_type_str(enum ice_vsi_type vsi_type)
+{
+ switch (vsi_type) {
+ case ICE_VSI_PF:
+ return "ICE_VSI_PF";
+ case ICE_VSI_VF:
+ return "ICE_VSI_VF";
+ case ICE_VSI_CTRL:
+ return "ICE_VSI_CTRL";
+ case ICE_VSI_CHNL:
+ return "ICE_VSI_CHNL";
+ case ICE_VSI_LB:
+ return "ICE_VSI_LB";
+ case ICE_VSI_SWITCHDEV_CTRL:
+ return "ICE_VSI_SWITCHDEV_CTRL";
+ default:
+ return "unknown";
+ }
+}
+
+/**
+ * ice_vsi_ctrl_all_rx_rings - Start or stop a VSI's Rx rings
+ * @vsi: the VSI being configured
+ * @ena: start or stop the Rx rings
+ *
+ * First enable/disable all of the Rx rings, flush any remaining writes, and
+ * then verify that they have all been enabled/disabled successfully. This will
+ * let all of the register writes complete when enabling/disabling the Rx rings
+ * before waiting for the change in hardware to complete.
+ */
+static int ice_vsi_ctrl_all_rx_rings(struct ice_vsi *vsi, bool ena)
+{
+ int ret = 0;
+ u16 i;
+
+ ice_for_each_rxq(vsi, i)
+ ice_vsi_ctrl_one_rx_ring(vsi, ena, i, false);
+
+ ice_flush(&vsi->back->hw);
+
+ ice_for_each_rxq(vsi, i) {
+ ret = ice_vsi_wait_one_rx_ring(vsi, ena, i);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+/**
+ * ice_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the VSI
+ * @vsi: VSI pointer
+ *
+ * On error: returns error code (negative)
+ * On success: returns 0
+ */
+static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+
+ dev = ice_pf_to_dev(pf);
+ if (vsi->type == ICE_VSI_CHNL)
+ return 0;
+
+ /* allocate memory for both Tx and Rx ring pointers */
+ vsi->tx_rings = devm_kcalloc(dev, vsi->alloc_txq,
+ sizeof(*vsi->tx_rings), GFP_KERNEL);
+ if (!vsi->tx_rings)
+ return -ENOMEM;
+
+ vsi->rx_rings = devm_kcalloc(dev, vsi->alloc_rxq,
+ sizeof(*vsi->rx_rings), GFP_KERNEL);
+ if (!vsi->rx_rings)
+ goto err_rings;
+
+ /* txq_map needs to have enough space to track both Tx (stack) rings
+ * and XDP rings; at this point vsi->num_xdp_txq might not be set,
+ * so use num_possible_cpus() as we want to always provide XDP ring
+ * per CPU, regardless of queue count settings from user that might
+ * have come from ethtool's set_channels() callback;
+ */
+ vsi->txq_map = devm_kcalloc(dev, (vsi->alloc_txq + num_possible_cpus()),
+ sizeof(*vsi->txq_map), GFP_KERNEL);
+
+ if (!vsi->txq_map)
+ goto err_txq_map;
+
+ vsi->rxq_map = devm_kcalloc(dev, vsi->alloc_rxq,
+ sizeof(*vsi->rxq_map), GFP_KERNEL);
+ if (!vsi->rxq_map)
+ goto err_rxq_map;
+
+ /* There is no need to allocate q_vectors for a loopback VSI. */
+ if (vsi->type == ICE_VSI_LB)
+ return 0;
+
+ /* allocate memory for q_vector pointers */
+ vsi->q_vectors = devm_kcalloc(dev, vsi->num_q_vectors,
+ sizeof(*vsi->q_vectors), GFP_KERNEL);
+ if (!vsi->q_vectors)
+ goto err_vectors;
+
+ vsi->af_xdp_zc_qps = bitmap_zalloc(max_t(int, vsi->alloc_txq, vsi->alloc_rxq), GFP_KERNEL);
+ if (!vsi->af_xdp_zc_qps)
+ goto err_zc_qps;
+
+ return 0;
+
+err_zc_qps:
+ devm_kfree(dev, vsi->q_vectors);
+err_vectors:
+ devm_kfree(dev, vsi->rxq_map);
+err_rxq_map:
+ devm_kfree(dev, vsi->txq_map);
+err_txq_map:
+ devm_kfree(dev, vsi->rx_rings);
+err_rings:
+ devm_kfree(dev, vsi->tx_rings);
+ return -ENOMEM;
+}
+
+/**
+ * ice_vsi_set_num_desc - Set number of descriptors for queues on this VSI
+ * @vsi: the VSI being configured
+ */
+static void ice_vsi_set_num_desc(struct ice_vsi *vsi)
+{
+ switch (vsi->type) {
+ case ICE_VSI_PF:
+ case ICE_VSI_SWITCHDEV_CTRL:
+ case ICE_VSI_CTRL:
+ case ICE_VSI_LB:
+ /* a user could change the values of num_[tr]x_desc using
+ * ethtool -G so we should keep those values instead of
+ * overwriting them with the defaults.
+ */
+ if (!vsi->num_rx_desc)
+ vsi->num_rx_desc = ICE_DFLT_NUM_RX_DESC;
+ if (!vsi->num_tx_desc)
+ vsi->num_tx_desc = ICE_DFLT_NUM_TX_DESC;
+ break;
+ default:
+ dev_dbg(ice_pf_to_dev(vsi->back), "Not setting number of Tx/Rx descriptors for VSI type %d\n",
+ vsi->type);
+ break;
+ }
+}
+
+/**
+ * ice_vsi_set_num_qs - Set number of queues, descriptors and vectors for a VSI
+ * @vsi: the VSI being configured
+ * @vf: the VF associated with this VSI, if any
+ *
+ * Return 0 on success and a negative value on error
+ */
+static void ice_vsi_set_num_qs(struct ice_vsi *vsi, struct ice_vf *vf)
+{
+ enum ice_vsi_type vsi_type = vsi->type;
+ struct ice_pf *pf = vsi->back;
+
+ if (WARN_ON(vsi_type == ICE_VSI_VF && !vf))
+ return;
+
+ switch (vsi_type) {
+ case ICE_VSI_PF:
+ if (vsi->req_txq) {
+ vsi->alloc_txq = vsi->req_txq;
+ vsi->num_txq = vsi->req_txq;
+ } else {
+ vsi->alloc_txq = min3(pf->num_lan_msix,
+ ice_get_avail_txq_count(pf),
+ (u16)num_online_cpus());
+ }
+
+ pf->num_lan_tx = vsi->alloc_txq;
+
+ /* only 1 Rx queue unless RSS is enabled */
+ if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+ vsi->alloc_rxq = 1;
+ } else {
+ if (vsi->req_rxq) {
+ vsi->alloc_rxq = vsi->req_rxq;
+ vsi->num_rxq = vsi->req_rxq;
+ } else {
+ vsi->alloc_rxq = min3(pf->num_lan_msix,
+ ice_get_avail_rxq_count(pf),
+ (u16)num_online_cpus());
+ }
+ }
+
+ pf->num_lan_rx = vsi->alloc_rxq;
+
+ vsi->num_q_vectors = min_t(int, pf->num_lan_msix,
+ max_t(int, vsi->alloc_rxq,
+ vsi->alloc_txq));
+ break;
+ case ICE_VSI_SWITCHDEV_CTRL:
+ /* The number of queues for ctrl VSI is equal to number of VFs.
+ * Each ring is associated to the corresponding VF_PR netdev.
+ */
+ vsi->alloc_txq = ice_get_num_vfs(pf);
+ vsi->alloc_rxq = vsi->alloc_txq;
+ vsi->num_q_vectors = 1;
+ break;
+ case ICE_VSI_VF:
+ if (vf->num_req_qs)
+ vf->num_vf_qs = vf->num_req_qs;
+ vsi->alloc_txq = vf->num_vf_qs;
+ vsi->alloc_rxq = vf->num_vf_qs;
+ /* pf->vfs.num_msix_per includes (VF miscellaneous vector +
+ * data queue interrupts). Since vsi->num_q_vectors is number
+ * of queues vectors, subtract 1 (ICE_NONQ_VECS_VF) from the
+ * original vector count
+ */
+ vsi->num_q_vectors = pf->vfs.num_msix_per - ICE_NONQ_VECS_VF;
+ break;
+ case ICE_VSI_CTRL:
+ vsi->alloc_txq = 1;
+ vsi->alloc_rxq = 1;
+ vsi->num_q_vectors = 1;
+ break;
+ case ICE_VSI_CHNL:
+ vsi->alloc_txq = 0;
+ vsi->alloc_rxq = 0;
+ break;
+ case ICE_VSI_LB:
+ vsi->alloc_txq = 1;
+ vsi->alloc_rxq = 1;
+ break;
+ default:
+ dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi_type);
+ break;
+ }
+
+ ice_vsi_set_num_desc(vsi);
+}
+
+/**
+ * ice_get_free_slot - get the next non-NULL location index in array
+ * @array: array to search
+ * @size: size of the array
+ * @curr: last known occupied index to be used as a search hint
+ *
+ * void * is being used to keep the functionality generic. This lets us use this
+ * function on any array of pointers.
+ */
+static int ice_get_free_slot(void *array, int size, int curr)
+{
+ int **tmp_array = (int **)array;
+ int next;
+
+ if (curr < (size - 1) && !tmp_array[curr + 1]) {
+ next = curr + 1;
+ } else {
+ int i = 0;
+
+ while ((i < size) && (tmp_array[i]))
+ i++;
+ if (i == size)
+ next = ICE_NO_VSI;
+ else
+ next = i;
+ }
+ return next;
+}
+
+/**
+ * ice_vsi_delete - delete a VSI from the switch
+ * @vsi: pointer to VSI being removed
+ */
+void ice_vsi_delete(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_vsi_ctx *ctxt;
+ int status;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return;
+
+ if (vsi->type == ICE_VSI_VF)
+ ctxt->vf_num = vsi->vf->vf_id;
+ ctxt->vsi_num = vsi->vsi_num;
+
+ memcpy(&ctxt->info, &vsi->info, sizeof(ctxt->info));
+
+ status = ice_free_vsi(&pf->hw, vsi->idx, ctxt, false, NULL);
+ if (status)
+ dev_err(ice_pf_to_dev(pf), "Failed to delete VSI %i in FW - error: %d\n",
+ vsi->vsi_num, status);
+
+ kfree(ctxt);
+}
+
+/**
+ * ice_vsi_free_arrays - De-allocate queue and vector pointer arrays for the VSI
+ * @vsi: pointer to VSI being cleared
+ */
+static void ice_vsi_free_arrays(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+
+ dev = ice_pf_to_dev(pf);
+
+ if (vsi->af_xdp_zc_qps) {
+ bitmap_free(vsi->af_xdp_zc_qps);
+ vsi->af_xdp_zc_qps = NULL;
+ }
+ /* free the ring and vector containers */
+ if (vsi->q_vectors) {
+ devm_kfree(dev, vsi->q_vectors);
+ vsi->q_vectors = NULL;
+ }
+ if (vsi->tx_rings) {
+ devm_kfree(dev, vsi->tx_rings);
+ vsi->tx_rings = NULL;
+ }
+ if (vsi->rx_rings) {
+ devm_kfree(dev, vsi->rx_rings);
+ vsi->rx_rings = NULL;
+ }
+ if (vsi->txq_map) {
+ devm_kfree(dev, vsi->txq_map);
+ vsi->txq_map = NULL;
+ }
+ if (vsi->rxq_map) {
+ devm_kfree(dev, vsi->rxq_map);
+ vsi->rxq_map = NULL;
+ }
+}
+
+/**
+ * ice_vsi_clear - clean up and deallocate the provided VSI
+ * @vsi: pointer to VSI being cleared
+ *
+ * This deallocates the VSI's queue resources, removes it from the PF's
+ * VSI array if necessary, and deallocates the VSI
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_vsi_clear(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = NULL;
+ struct device *dev;
+
+ if (!vsi)
+ return 0;
+
+ if (!vsi->back)
+ return -EINVAL;
+
+ pf = vsi->back;
+ dev = ice_pf_to_dev(pf);
+
+ if (!pf->vsi[vsi->idx] || pf->vsi[vsi->idx] != vsi) {
+ dev_dbg(dev, "vsi does not exist at pf->vsi[%d]\n", vsi->idx);
+ return -EINVAL;
+ }
+
+ mutex_lock(&pf->sw_mutex);
+ /* updates the PF for this cleared VSI */
+
+ pf->vsi[vsi->idx] = NULL;
+ if (vsi->idx < pf->next_vsi && vsi->type != ICE_VSI_CTRL)
+ pf->next_vsi = vsi->idx;
+ if (vsi->idx < pf->next_vsi && vsi->type == ICE_VSI_CTRL && vsi->vf)
+ pf->next_vsi = vsi->idx;
+
+ ice_vsi_free_arrays(vsi);
+ mutex_unlock(&pf->sw_mutex);
+ devm_kfree(dev, vsi);
+
+ return 0;
+}
+
+/**
+ * ice_msix_clean_ctrl_vsi - MSIX mode interrupt handler for ctrl VSI
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t ice_msix_clean_ctrl_vsi(int __always_unused irq, void *data)
+{
+ struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
+
+ if (!q_vector->tx.tx_ring)
+ return IRQ_HANDLED;
+
+#define FDIR_RX_DESC_CLEAN_BUDGET 64
+ ice_clean_rx_irq(q_vector->rx.rx_ring, FDIR_RX_DESC_CLEAN_BUDGET);
+ ice_clean_ctrl_tx_irq(q_vector->tx.tx_ring);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * ice_msix_clean_rings - MSIX mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data)
+{
+ struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
+
+ if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring)
+ return IRQ_HANDLED;
+
+ q_vector->total_events++;
+
+ napi_schedule(&q_vector->napi);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *data)
+{
+ struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
+ struct ice_pf *pf = q_vector->vsi->back;
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring)
+ return IRQ_HANDLED;
+
+ rcu_read_lock();
+ ice_for_each_vf_rcu(pf, bkt, vf)
+ napi_schedule(&vf->repr->q_vector->napi);
+ rcu_read_unlock();
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * ice_vsi_alloc - Allocates the next available struct VSI in the PF
+ * @pf: board private structure
+ * @vsi_type: type of VSI
+ * @ch: ptr to channel
+ * @vf: VF for ICE_VSI_VF and ICE_VSI_CTRL
+ *
+ * The VF pointer is used for ICE_VSI_VF and ICE_VSI_CTRL. For ICE_VSI_CTRL,
+ * it may be NULL in the case there is no association with a VF. For
+ * ICE_VSI_VF the VF pointer *must not* be NULL.
+ *
+ * returns a pointer to a VSI on success, NULL on failure.
+ */
+static struct ice_vsi *
+ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type,
+ struct ice_channel *ch, struct ice_vf *vf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_vsi *vsi = NULL;
+
+ if (WARN_ON(vsi_type == ICE_VSI_VF && !vf))
+ return NULL;
+
+ /* Need to protect the allocation of the VSIs at the PF level */
+ mutex_lock(&pf->sw_mutex);
+
+ /* If we have already allocated our maximum number of VSIs,
+ * pf->next_vsi will be ICE_NO_VSI. If not, pf->next_vsi index
+ * is available to be populated
+ */
+ if (pf->next_vsi == ICE_NO_VSI) {
+ dev_dbg(dev, "out of VSI slots!\n");
+ goto unlock_pf;
+ }
+
+ vsi = devm_kzalloc(dev, sizeof(*vsi), GFP_KERNEL);
+ if (!vsi)
+ goto unlock_pf;
+
+ vsi->type = vsi_type;
+ vsi->back = pf;
+ set_bit(ICE_VSI_DOWN, vsi->state);
+
+ if (vsi_type == ICE_VSI_VF)
+ ice_vsi_set_num_qs(vsi, vf);
+ else if (vsi_type != ICE_VSI_CHNL)
+ ice_vsi_set_num_qs(vsi, NULL);
+
+ switch (vsi->type) {
+ case ICE_VSI_SWITCHDEV_CTRL:
+ if (ice_vsi_alloc_arrays(vsi))
+ goto err_rings;
+
+ /* Setup eswitch MSIX irq handler for VSI */
+ vsi->irq_handler = ice_eswitch_msix_clean_rings;
+ break;
+ case ICE_VSI_PF:
+ if (ice_vsi_alloc_arrays(vsi))
+ goto err_rings;
+
+ /* Setup default MSIX irq handler for VSI */
+ vsi->irq_handler = ice_msix_clean_rings;
+ break;
+ case ICE_VSI_CTRL:
+ if (ice_vsi_alloc_arrays(vsi))
+ goto err_rings;
+
+ /* Setup ctrl VSI MSIX irq handler */
+ vsi->irq_handler = ice_msix_clean_ctrl_vsi;
+
+ /* For the PF control VSI this is NULL, for the VF control VSI
+ * this will be the first VF to allocate it.
+ */
+ vsi->vf = vf;
+ break;
+ case ICE_VSI_VF:
+ if (ice_vsi_alloc_arrays(vsi))
+ goto err_rings;
+ vsi->vf = vf;
+ break;
+ case ICE_VSI_CHNL:
+ if (!ch)
+ goto err_rings;
+ vsi->num_rxq = ch->num_rxq;
+ vsi->num_txq = ch->num_txq;
+ vsi->next_base_q = ch->base_q;
+ break;
+ case ICE_VSI_LB:
+ if (ice_vsi_alloc_arrays(vsi))
+ goto err_rings;
+ break;
+ default:
+ dev_warn(dev, "Unknown VSI type %d\n", vsi->type);
+ goto unlock_pf;
+ }
+
+ if (vsi->type == ICE_VSI_CTRL && !vf) {
+ /* Use the last VSI slot as the index for PF control VSI */
+ vsi->idx = pf->num_alloc_vsi - 1;
+ pf->ctrl_vsi_idx = vsi->idx;
+ pf->vsi[vsi->idx] = vsi;
+ } else {
+ /* fill slot and make note of the index */
+ vsi->idx = pf->next_vsi;
+ pf->vsi[pf->next_vsi] = vsi;
+
+ /* prepare pf->next_vsi for next use */
+ pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi,
+ pf->next_vsi);
+ }
+
+ if (vsi->type == ICE_VSI_CTRL && vf)
+ vf->ctrl_vsi_idx = vsi->idx;
+ goto unlock_pf;
+
+err_rings:
+ devm_kfree(dev, vsi);
+ vsi = NULL;
+unlock_pf:
+ mutex_unlock(&pf->sw_mutex);
+ return vsi;
+}
+
+/**
+ * ice_alloc_fd_res - Allocate FD resource for a VSI
+ * @vsi: pointer to the ice_vsi
+ *
+ * This allocates the FD resources
+ *
+ * Returns 0 on success, -EPERM on no-op or -EIO on failure
+ */
+static int ice_alloc_fd_res(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ u32 g_val, b_val;
+
+ /* Flow Director filters are only allocated/assigned to the PF VSI or
+ * CHNL VSI which passes the traffic. The CTRL VSI is only used to
+ * add/delete filters so resources are not allocated to it
+ */
+ if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
+ return -EPERM;
+
+ if (!(vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_VF ||
+ vsi->type == ICE_VSI_CHNL))
+ return -EPERM;
+
+ /* FD filters from guaranteed pool per VSI */
+ g_val = pf->hw.func_caps.fd_fltr_guar;
+ if (!g_val)
+ return -EPERM;
+
+ /* FD filters from best effort pool */
+ b_val = pf->hw.func_caps.fd_fltr_best_effort;
+ if (!b_val)
+ return -EPERM;
+
+ /* PF main VSI gets only 64 FD resources from guaranteed pool
+ * when ADQ is configured.
+ */
+#define ICE_PF_VSI_GFLTR 64
+
+ /* determine FD filter resources per VSI from shared(best effort) and
+ * dedicated pool
+ */
+ if (vsi->type == ICE_VSI_PF) {
+ vsi->num_gfltr = g_val;
+ /* if MQPRIO is configured, main VSI doesn't get all FD
+ * resources from guaranteed pool. PF VSI gets 64 FD resources
+ */
+ if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
+ if (g_val < ICE_PF_VSI_GFLTR)
+ return -EPERM;
+ /* allow bare minimum entries for PF VSI */
+ vsi->num_gfltr = ICE_PF_VSI_GFLTR;
+ }
+
+ /* each VSI gets same "best_effort" quota */
+ vsi->num_bfltr = b_val;
+ } else if (vsi->type == ICE_VSI_VF) {
+ vsi->num_gfltr = 0;
+
+ /* each VSI gets same "best_effort" quota */
+ vsi->num_bfltr = b_val;
+ } else {
+ struct ice_vsi *main_vsi;
+ int numtc;
+
+ main_vsi = ice_get_main_vsi(pf);
+ if (!main_vsi)
+ return -EPERM;
+
+ if (!main_vsi->all_numtc)
+ return -EINVAL;
+
+ /* figure out ADQ numtc */
+ numtc = main_vsi->all_numtc - ICE_CHNL_START_TC;
+
+ /* only one TC but still asking resources for channels,
+ * invalid config
+ */
+ if (numtc < ICE_CHNL_START_TC)
+ return -EPERM;
+
+ g_val -= ICE_PF_VSI_GFLTR;
+ /* channel VSIs gets equal share from guaranteed pool */
+ vsi->num_gfltr = g_val / numtc;
+
+ /* each VSI gets same "best_effort" quota */
+ vsi->num_bfltr = b_val;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vsi_get_qs - Assign queues from PF to VSI
+ * @vsi: the VSI to assign queues to
+ *
+ * Returns 0 on success and a negative value on error
+ */
+static int ice_vsi_get_qs(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_qs_cfg tx_qs_cfg = {
+ .qs_mutex = &pf->avail_q_mutex,
+ .pf_map = pf->avail_txqs,
+ .pf_map_size = pf->max_pf_txqs,
+ .q_count = vsi->alloc_txq,
+ .scatter_count = ICE_MAX_SCATTER_TXQS,
+ .vsi_map = vsi->txq_map,
+ .vsi_map_offset = 0,
+ .mapping_mode = ICE_VSI_MAP_CONTIG
+ };
+ struct ice_qs_cfg rx_qs_cfg = {
+ .qs_mutex = &pf->avail_q_mutex,
+ .pf_map = pf->avail_rxqs,
+ .pf_map_size = pf->max_pf_rxqs,
+ .q_count = vsi->alloc_rxq,
+ .scatter_count = ICE_MAX_SCATTER_RXQS,
+ .vsi_map = vsi->rxq_map,
+ .vsi_map_offset = 0,
+ .mapping_mode = ICE_VSI_MAP_CONTIG
+ };
+ int ret;
+
+ if (vsi->type == ICE_VSI_CHNL)
+ return 0;
+
+ ret = __ice_vsi_get_qs(&tx_qs_cfg);
+ if (ret)
+ return ret;
+ vsi->tx_mapping_mode = tx_qs_cfg.mapping_mode;
+
+ ret = __ice_vsi_get_qs(&rx_qs_cfg);
+ if (ret)
+ return ret;
+ vsi->rx_mapping_mode = rx_qs_cfg.mapping_mode;
+
+ return 0;
+}
+
+/**
+ * ice_vsi_put_qs - Release queues from VSI to PF
+ * @vsi: the VSI that is going to release queues
+ */
+static void ice_vsi_put_qs(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ int i;
+
+ mutex_lock(&pf->avail_q_mutex);
+
+ ice_for_each_alloc_txq(vsi, i) {
+ clear_bit(vsi->txq_map[i], pf->avail_txqs);
+ vsi->txq_map[i] = ICE_INVAL_Q_INDEX;
+ }
+
+ ice_for_each_alloc_rxq(vsi, i) {
+ clear_bit(vsi->rxq_map[i], pf->avail_rxqs);
+ vsi->rxq_map[i] = ICE_INVAL_Q_INDEX;
+ }
+
+ mutex_unlock(&pf->avail_q_mutex);
+}
+
+/**
+ * ice_is_safe_mode
+ * @pf: pointer to the PF struct
+ *
+ * returns true if driver is in safe mode, false otherwise
+ */
+bool ice_is_safe_mode(struct ice_pf *pf)
+{
+ return !test_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
+}
+
+/**
+ * ice_is_rdma_ena
+ * @pf: pointer to the PF struct
+ *
+ * returns true if RDMA is currently supported, false otherwise
+ */
+bool ice_is_rdma_ena(struct ice_pf *pf)
+{
+ return test_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+}
+
+/**
+ * ice_vsi_clean_rss_flow_fld - Delete RSS configuration
+ * @vsi: the VSI being cleaned up
+ *
+ * This function deletes RSS input set for all flows that were configured
+ * for this VSI
+ */
+static void ice_vsi_clean_rss_flow_fld(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ int status;
+
+ if (ice_is_safe_mode(pf))
+ return;
+
+ status = ice_rem_vsi_rss_cfg(&pf->hw, vsi->idx);
+ if (status)
+ dev_dbg(ice_pf_to_dev(pf), "ice_rem_vsi_rss_cfg failed for vsi = %d, error = %d\n",
+ vsi->vsi_num, status);
+}
+
+/**
+ * ice_rss_clean - Delete RSS related VSI structures and configuration
+ * @vsi: the VSI being removed
+ */
+static void ice_rss_clean(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+
+ dev = ice_pf_to_dev(pf);
+
+ if (vsi->rss_hkey_user)
+ devm_kfree(dev, vsi->rss_hkey_user);
+ if (vsi->rss_lut_user)
+ devm_kfree(dev, vsi->rss_lut_user);
+
+ ice_vsi_clean_rss_flow_fld(vsi);
+ /* remove RSS replay list */
+ if (!ice_is_safe_mode(pf))
+ ice_rem_vsi_rss_list(&pf->hw, vsi->idx);
+}
+
+/**
+ * ice_vsi_set_rss_params - Setup RSS capabilities per VSI type
+ * @vsi: the VSI being configured
+ */
+static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
+{
+ struct ice_hw_common_caps *cap;
+ struct ice_pf *pf = vsi->back;
+
+ if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+ vsi->rss_size = 1;
+ return;
+ }
+
+ cap = &pf->hw.func_caps.common_cap;
+ switch (vsi->type) {
+ case ICE_VSI_CHNL:
+ case ICE_VSI_PF:
+ /* PF VSI will inherit RSS instance of PF */
+ vsi->rss_table_size = (u16)cap->rss_table_size;
+ if (vsi->type == ICE_VSI_CHNL)
+ vsi->rss_size = min_t(u16, vsi->num_rxq,
+ BIT(cap->rss_table_entry_width));
+ else
+ vsi->rss_size = min_t(u16, num_online_cpus(),
+ BIT(cap->rss_table_entry_width));
+ vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF;
+ break;
+ case ICE_VSI_SWITCHDEV_CTRL:
+ vsi->rss_table_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
+ vsi->rss_size = min_t(u16, num_online_cpus(),
+ BIT(cap->rss_table_entry_width));
+ vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI;
+ break;
+ case ICE_VSI_VF:
+ /* VF VSI will get a small RSS table.
+ * For VSI_LUT, LUT size should be set to 64 bytes.
+ */
+ vsi->rss_table_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
+ vsi->rss_size = ICE_MAX_RSS_QS_PER_VF;
+ vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI;
+ break;
+ case ICE_VSI_LB:
+ break;
+ default:
+ dev_dbg(ice_pf_to_dev(pf), "Unsupported VSI type %s\n",
+ ice_vsi_type_str(vsi->type));
+ break;
+ }
+}
+
+/**
+ * ice_set_dflt_vsi_ctx - Set default VSI context before adding a VSI
+ * @hw: HW structure used to determine the VLAN mode of the device
+ * @ctxt: the VSI context being set
+ *
+ * This initializes a default VSI context for all sections except the Queues.
+ */
+static void ice_set_dflt_vsi_ctx(struct ice_hw *hw, struct ice_vsi_ctx *ctxt)
+{
+ u32 table = 0;
+
+ memset(&ctxt->info, 0, sizeof(ctxt->info));
+ /* VSI's should be allocated from shared pool */
+ ctxt->alloc_from_pool = true;
+ /* Src pruning enabled by default */
+ ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE;
+ /* Traffic from VSI can be sent to LAN */
+ ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
+ /* allow all untagged/tagged packets by default on Tx */
+ ctxt->info.inner_vlan_flags = ((ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL &
+ ICE_AQ_VSI_INNER_VLAN_TX_MODE_M) >>
+ ICE_AQ_VSI_INNER_VLAN_TX_MODE_S);
+ /* SVM - by default bits 3 and 4 in inner_vlan_flags are 0's which
+ * results in legacy behavior (show VLAN, DEI, and UP) in descriptor.
+ *
+ * DVM - leave inner VLAN in packet by default
+ */
+ if (ice_is_dvm_ena(hw)) {
+ ctxt->info.inner_vlan_flags |=
+ ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING;
+ ctxt->info.outer_vlan_flags =
+ (ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL <<
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) &
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M;
+ ctxt->info.outer_vlan_flags |=
+ (ICE_AQ_VSI_OUTER_TAG_VLAN_8100 <<
+ ICE_AQ_VSI_OUTER_TAG_TYPE_S) &
+ ICE_AQ_VSI_OUTER_TAG_TYPE_M;
+ ctxt->info.outer_vlan_flags |=
+ FIELD_PREP(ICE_AQ_VSI_OUTER_VLAN_EMODE_M,
+ ICE_AQ_VSI_OUTER_VLAN_EMODE_NOTHING);
+ }
+ /* Have 1:1 UP mapping for both ingress/egress tables */
+ table |= ICE_UP_TABLE_TRANSLATE(0, 0);
+ table |= ICE_UP_TABLE_TRANSLATE(1, 1);
+ table |= ICE_UP_TABLE_TRANSLATE(2, 2);
+ table |= ICE_UP_TABLE_TRANSLATE(3, 3);
+ table |= ICE_UP_TABLE_TRANSLATE(4, 4);
+ table |= ICE_UP_TABLE_TRANSLATE(5, 5);
+ table |= ICE_UP_TABLE_TRANSLATE(6, 6);
+ table |= ICE_UP_TABLE_TRANSLATE(7, 7);
+ ctxt->info.ingress_table = cpu_to_le32(table);
+ ctxt->info.egress_table = cpu_to_le32(table);
+ /* Have 1:1 UP mapping for outer to inner UP table */
+ ctxt->info.outer_up_table = cpu_to_le32(table);
+ /* No Outer tag support outer_tag_flags remains to zero */
+}
+
+/**
+ * ice_vsi_setup_q_map - Setup a VSI queue map
+ * @vsi: the VSI being configured
+ * @ctxt: VSI context structure
+ */
+static int ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
+{
+ u16 offset = 0, qmap = 0, tx_count = 0, rx_count = 0, pow = 0;
+ u16 num_txq_per_tc, num_rxq_per_tc;
+ u16 qcount_tx = vsi->alloc_txq;
+ u16 qcount_rx = vsi->alloc_rxq;
+ u8 netdev_tc = 0;
+ int i;
+
+ if (!vsi->tc_cfg.numtc) {
+ /* at least TC0 should be enabled by default */
+ vsi->tc_cfg.numtc = 1;
+ vsi->tc_cfg.ena_tc = 1;
+ }
+
+ num_rxq_per_tc = min_t(u16, qcount_rx / vsi->tc_cfg.numtc, ICE_MAX_RXQS_PER_TC);
+ if (!num_rxq_per_tc)
+ num_rxq_per_tc = 1;
+ num_txq_per_tc = qcount_tx / vsi->tc_cfg.numtc;
+ if (!num_txq_per_tc)
+ num_txq_per_tc = 1;
+
+ /* find the (rounded up) power-of-2 of qcount */
+ pow = (u16)order_base_2(num_rxq_per_tc);
+
+ /* TC mapping is a function of the number of Rx queues assigned to the
+ * VSI for each traffic class and the offset of these queues.
+ * The first 10 bits are for queue offset for TC0, next 4 bits for no:of
+ * queues allocated to TC0. No:of queues is a power-of-2.
+ *
+ * If TC is not enabled, the queue offset is set to 0, and allocate one
+ * queue, this way, traffic for the given TC will be sent to the default
+ * queue.
+ *
+ * Setup number and offset of Rx queues for all TCs for the VSI
+ */
+ ice_for_each_traffic_class(i) {
+ if (!(vsi->tc_cfg.ena_tc & BIT(i))) {
+ /* TC is not enabled */
+ vsi->tc_cfg.tc_info[i].qoffset = 0;
+ vsi->tc_cfg.tc_info[i].qcount_rx = 1;
+ vsi->tc_cfg.tc_info[i].qcount_tx = 1;
+ vsi->tc_cfg.tc_info[i].netdev_tc = 0;
+ ctxt->info.tc_mapping[i] = 0;
+ continue;
+ }
+
+ /* TC is enabled */
+ vsi->tc_cfg.tc_info[i].qoffset = offset;
+ vsi->tc_cfg.tc_info[i].qcount_rx = num_rxq_per_tc;
+ vsi->tc_cfg.tc_info[i].qcount_tx = num_txq_per_tc;
+ vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++;
+
+ qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
+ ICE_AQ_VSI_TC_Q_OFFSET_M) |
+ ((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
+ ICE_AQ_VSI_TC_Q_NUM_M);
+ offset += num_rxq_per_tc;
+ tx_count += num_txq_per_tc;
+ ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
+ }
+
+ /* if offset is non-zero, means it is calculated correctly based on
+ * enabled TCs for a given VSI otherwise qcount_rx will always
+ * be correct and non-zero because it is based off - VSI's
+ * allocated Rx queues which is at least 1 (hence qcount_tx will be
+ * at least 1)
+ */
+ if (offset)
+ rx_count = offset;
+ else
+ rx_count = num_rxq_per_tc;
+
+ if (rx_count > vsi->alloc_rxq) {
+ dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Rx queues (%u), than were allocated (%u)!\n",
+ rx_count, vsi->alloc_rxq);
+ return -EINVAL;
+ }
+
+ if (tx_count > vsi->alloc_txq) {
+ dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Tx queues (%u), than were allocated (%u)!\n",
+ tx_count, vsi->alloc_txq);
+ return -EINVAL;
+ }
+
+ vsi->num_txq = tx_count;
+ vsi->num_rxq = rx_count;
+
+ if (vsi->type == ICE_VSI_VF && vsi->num_txq != vsi->num_rxq) {
+ dev_dbg(ice_pf_to_dev(vsi->back), "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n");
+ /* since there is a chance that num_rxq could have been changed
+ * in the above for loop, make num_txq equal to num_rxq.
+ */
+ vsi->num_txq = vsi->num_rxq;
+ }
+
+ /* Rx queue mapping */
+ ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG);
+ /* q_mapping buffer holds the info for the first queue allocated for
+ * this VSI in the PF space and also the number of queues associated
+ * with this VSI.
+ */
+ ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]);
+ ctxt->info.q_mapping[1] = cpu_to_le16(vsi->num_rxq);
+
+ return 0;
+}
+
+/**
+ * ice_set_fd_vsi_ctx - Set FD VSI context before adding a VSI
+ * @ctxt: the VSI context being set
+ * @vsi: the VSI being configured
+ */
+static void ice_set_fd_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
+{
+ u8 dflt_q_group, dflt_q_prio;
+ u16 dflt_q, report_q, val;
+
+ if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_CTRL &&
+ vsi->type != ICE_VSI_VF && vsi->type != ICE_VSI_CHNL)
+ return;
+
+ val = ICE_AQ_VSI_PROP_FLOW_DIR_VALID;
+ ctxt->info.valid_sections |= cpu_to_le16(val);
+ dflt_q = 0;
+ dflt_q_group = 0;
+ report_q = 0;
+ dflt_q_prio = 0;
+
+ /* enable flow director filtering/programming */
+ val = ICE_AQ_VSI_FD_ENABLE | ICE_AQ_VSI_FD_PROG_ENABLE;
+ ctxt->info.fd_options = cpu_to_le16(val);
+ /* max of allocated flow director filters */
+ ctxt->info.max_fd_fltr_dedicated =
+ cpu_to_le16(vsi->num_gfltr);
+ /* max of shared flow director filters any VSI may program */
+ ctxt->info.max_fd_fltr_shared =
+ cpu_to_le16(vsi->num_bfltr);
+ /* default queue index within the VSI of the default FD */
+ val = ((dflt_q << ICE_AQ_VSI_FD_DEF_Q_S) &
+ ICE_AQ_VSI_FD_DEF_Q_M);
+ /* target queue or queue group to the FD filter */
+ val |= ((dflt_q_group << ICE_AQ_VSI_FD_DEF_GRP_S) &
+ ICE_AQ_VSI_FD_DEF_GRP_M);
+ ctxt->info.fd_def_q = cpu_to_le16(val);
+ /* queue index on which FD filter completion is reported */
+ val = ((report_q << ICE_AQ_VSI_FD_REPORT_Q_S) &
+ ICE_AQ_VSI_FD_REPORT_Q_M);
+ /* priority of the default qindex action */
+ val |= ((dflt_q_prio << ICE_AQ_VSI_FD_DEF_PRIORITY_S) &
+ ICE_AQ_VSI_FD_DEF_PRIORITY_M);
+ ctxt->info.fd_report_opt = cpu_to_le16(val);
+}
+
+/**
+ * ice_set_rss_vsi_ctx - Set RSS VSI context before adding a VSI
+ * @ctxt: the VSI context being set
+ * @vsi: the VSI being configured
+ */
+static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
+{
+ u8 lut_type, hash_type;
+ struct device *dev;
+ struct ice_pf *pf;
+
+ pf = vsi->back;
+ dev = ice_pf_to_dev(pf);
+
+ switch (vsi->type) {
+ case ICE_VSI_CHNL:
+ case ICE_VSI_PF:
+ /* PF VSI will inherit RSS instance of PF */
+ lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF;
+ hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
+ break;
+ case ICE_VSI_VF:
+ /* VF VSI will gets a small RSS table which is a VSI LUT type */
+ lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
+ hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
+ break;
+ default:
+ dev_dbg(dev, "Unsupported VSI type %s\n",
+ ice_vsi_type_str(vsi->type));
+ return;
+ }
+
+ ctxt->info.q_opt_rss = ((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
+ ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
+ (hash_type & ICE_AQ_VSI_Q_OPT_RSS_HASH_M);
+}
+
+static void
+ice_chnl_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
+{
+ struct ice_pf *pf = vsi->back;
+ u16 qcount, qmap;
+ u8 offset = 0;
+ int pow;
+
+ qcount = min_t(int, vsi->num_rxq, pf->num_lan_msix);
+
+ pow = order_base_2(qcount);
+ qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
+ ICE_AQ_VSI_TC_Q_OFFSET_M) |
+ ((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
+ ICE_AQ_VSI_TC_Q_NUM_M);
+
+ ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+ ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG);
+ ctxt->info.q_mapping[0] = cpu_to_le16(vsi->next_base_q);
+ ctxt->info.q_mapping[1] = cpu_to_le16(qcount);
+}
+
+/**
+ * ice_vsi_init - Create and initialize a VSI
+ * @vsi: the VSI being configured
+ * @init_vsi: is this call creating a VSI
+ *
+ * This initializes a VSI context depending on the VSI type to be added and
+ * passes it down to the add_vsi aq command to create a new VSI.
+ */
+static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ struct ice_vsi_ctx *ctxt;
+ struct device *dev;
+ int ret = 0;
+
+ dev = ice_pf_to_dev(pf);
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ switch (vsi->type) {
+ case ICE_VSI_CTRL:
+ case ICE_VSI_LB:
+ case ICE_VSI_PF:
+ ctxt->flags = ICE_AQ_VSI_TYPE_PF;
+ break;
+ case ICE_VSI_SWITCHDEV_CTRL:
+ case ICE_VSI_CHNL:
+ ctxt->flags = ICE_AQ_VSI_TYPE_VMDQ2;
+ break;
+ case ICE_VSI_VF:
+ ctxt->flags = ICE_AQ_VSI_TYPE_VF;
+ /* VF number here is the absolute VF number (0-255) */
+ ctxt->vf_num = vsi->vf->vf_id + hw->func_caps.vf_base_id;
+ break;
+ default:
+ ret = -ENODEV;
+ goto out;
+ }
+
+ /* Handle VLAN pruning for channel VSI if main VSI has VLAN
+ * prune enabled
+ */
+ if (vsi->type == ICE_VSI_CHNL) {
+ struct ice_vsi *main_vsi;
+
+ main_vsi = ice_get_main_vsi(pf);
+ if (main_vsi && ice_vsi_is_vlan_pruning_ena(main_vsi))
+ ctxt->info.sw_flags2 |=
+ ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+ else
+ ctxt->info.sw_flags2 &=
+ ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+ }
+
+ ice_set_dflt_vsi_ctx(hw, ctxt);
+ if (test_bit(ICE_FLAG_FD_ENA, pf->flags))
+ ice_set_fd_vsi_ctx(ctxt, vsi);
+ /* if the switch is in VEB mode, allow VSI loopback */
+ if (vsi->vsw->bridge_mode == BRIDGE_MODE_VEB)
+ ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
+
+ /* Set LUT type and HASH type if RSS is enabled */
+ if (test_bit(ICE_FLAG_RSS_ENA, pf->flags) &&
+ vsi->type != ICE_VSI_CTRL) {
+ ice_set_rss_vsi_ctx(ctxt, vsi);
+ /* if updating VSI context, make sure to set valid_section:
+ * to indicate which section of VSI context being updated
+ */
+ if (!init_vsi)
+ ctxt->info.valid_sections |=
+ cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
+ }
+
+ ctxt->info.sw_id = vsi->port_info->sw_id;
+ if (vsi->type == ICE_VSI_CHNL) {
+ ice_chnl_vsi_setup_q_map(vsi, ctxt);
+ } else {
+ ret = ice_vsi_setup_q_map(vsi, ctxt);
+ if (ret)
+ goto out;
+
+ if (!init_vsi) /* means VSI being updated */
+ /* must to indicate which section of VSI context are
+ * being modified
+ */
+ ctxt->info.valid_sections |=
+ cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
+ }
+
+ /* Allow control frames out of main VSI */
+ if (vsi->type == ICE_VSI_PF) {
+ ctxt->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD;
+ ctxt->info.valid_sections |=
+ cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+ }
+
+ if (init_vsi) {
+ ret = ice_add_vsi(hw, vsi->idx, ctxt, NULL);
+ if (ret) {
+ dev_err(dev, "Add VSI failed, err %d\n", ret);
+ ret = -EIO;
+ goto out;
+ }
+ } else {
+ ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (ret) {
+ dev_err(dev, "Update VSI failed, err %d\n", ret);
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ /* keep context for update VSI operations */
+ vsi->info = ctxt->info;
+
+ /* record VSI number returned */
+ vsi->vsi_num = ctxt->vsi_num;
+
+out:
+ kfree(ctxt);
+ return ret;
+}
+
+/**
+ * ice_free_res - free a block of resources
+ * @res: pointer to the resource
+ * @index: starting index previously returned by ice_get_res
+ * @id: identifier to track owner
+ *
+ * Returns number of resources freed
+ */
+int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id)
+{
+ int count = 0;
+ int i;
+
+ if (!res || index >= res->end)
+ return -EINVAL;
+
+ id |= ICE_RES_VALID_BIT;
+ for (i = index; i < res->end && res->list[i] == id; i++) {
+ res->list[i] = 0;
+ count++;
+ }
+
+ return count;
+}
+
+/**
+ * ice_search_res - Search the tracker for a block of resources
+ * @res: pointer to the resource
+ * @needed: size of the block needed
+ * @id: identifier to track owner
+ *
+ * Returns the base item index of the block, or -ENOMEM for error
+ */
+static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
+{
+ u16 start = 0, end = 0;
+
+ if (needed > res->end)
+ return -ENOMEM;
+
+ id |= ICE_RES_VALID_BIT;
+
+ do {
+ /* skip already allocated entries */
+ if (res->list[end++] & ICE_RES_VALID_BIT) {
+ start = end;
+ if ((start + needed) > res->end)
+ break;
+ }
+
+ if (end == (start + needed)) {
+ int i = start;
+
+ /* there was enough, so assign it to the requestor */
+ while (i != end)
+ res->list[i++] = id;
+
+ return start;
+ }
+ } while (end < res->end);
+
+ return -ENOMEM;
+}
+
+/**
+ * ice_get_free_res_count - Get free count from a resource tracker
+ * @res: Resource tracker instance
+ */
+static u16 ice_get_free_res_count(struct ice_res_tracker *res)
+{
+ u16 i, count = 0;
+
+ for (i = 0; i < res->end; i++)
+ if (!(res->list[i] & ICE_RES_VALID_BIT))
+ count++;
+
+ return count;
+}
+
+/**
+ * ice_get_res - get a block of resources
+ * @pf: board private structure
+ * @res: pointer to the resource
+ * @needed: size of the block needed
+ * @id: identifier to track owner
+ *
+ * Returns the base item index of the block, or negative for error
+ */
+int
+ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
+{
+ if (!res || !pf)
+ return -EINVAL;
+
+ if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) {
+ dev_err(ice_pf_to_dev(pf), "param err: needed=%d, num_entries = %d id=0x%04x\n",
+ needed, res->num_entries, id);
+ return -EINVAL;
+ }
+
+ return ice_search_res(res, needed, id);
+}
+
+/**
+ * ice_get_vf_ctrl_res - Get VF control VSI resource
+ * @pf: pointer to the PF structure
+ * @vsi: the VSI to allocate a resource for
+ *
+ * Look up whether another VF has already allocated the control VSI resource.
+ * If so, re-use this resource so that we share it among all VFs.
+ *
+ * Otherwise, allocate the resource and return it.
+ */
+static int ice_get_vf_ctrl_res(struct ice_pf *pf, struct ice_vsi *vsi)
+{
+ struct ice_vf *vf;
+ unsigned int bkt;
+ int base;
+
+ rcu_read_lock();
+ ice_for_each_vf_rcu(pf, bkt, vf) {
+ if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) {
+ base = pf->vsi[vf->ctrl_vsi_idx]->base_vector;
+ rcu_read_unlock();
+ return base;
+ }
+ }
+ rcu_read_unlock();
+
+ return ice_get_res(pf, pf->irq_tracker, vsi->num_q_vectors,
+ ICE_RES_VF_CTRL_VEC_ID);
+}
+
+/**
+ * ice_vsi_setup_vector_base - Set up the base vector for the given VSI
+ * @vsi: ptr to the VSI
+ *
+ * This should only be called after ice_vsi_alloc() which allocates the
+ * corresponding SW VSI structure and initializes num_queue_pairs for the
+ * newly allocated VSI.
+ *
+ * Returns 0 on success or negative on failure
+ */
+static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ u16 num_q_vectors;
+ int base;
+
+ dev = ice_pf_to_dev(pf);
+ /* SRIOV doesn't grab irq_tracker entries for each VSI */
+ if (vsi->type == ICE_VSI_VF)
+ return 0;
+ if (vsi->type == ICE_VSI_CHNL)
+ return 0;
+
+ if (vsi->base_vector) {
+ dev_dbg(dev, "VSI %d has non-zero base vector %d\n",
+ vsi->vsi_num, vsi->base_vector);
+ return -EEXIST;
+ }
+
+ num_q_vectors = vsi->num_q_vectors;
+ /* reserve slots from OS requested IRQs */
+ if (vsi->type == ICE_VSI_CTRL && vsi->vf) {
+ base = ice_get_vf_ctrl_res(pf, vsi);
+ } else {
+ base = ice_get_res(pf, pf->irq_tracker, num_q_vectors,
+ vsi->idx);
+ }
+
+ if (base < 0) {
+ dev_err(dev, "%d MSI-X interrupts available. %s %d failed to get %d MSI-X vectors\n",
+ ice_get_free_res_count(pf->irq_tracker),
+ ice_vsi_type_str(vsi->type), vsi->idx, num_q_vectors);
+ return -ENOENT;
+ }
+ vsi->base_vector = (u16)base;
+ pf->num_avail_sw_msix -= num_q_vectors;
+
+ return 0;
+}
+
+/**
+ * ice_vsi_clear_rings - Deallocates the Tx and Rx rings for VSI
+ * @vsi: the VSI having rings deallocated
+ */
+static void ice_vsi_clear_rings(struct ice_vsi *vsi)
+{
+ int i;
+
+ /* Avoid stale references by clearing map from vector to ring */
+ if (vsi->q_vectors) {
+ ice_for_each_q_vector(vsi, i) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+ if (q_vector) {
+ q_vector->tx.tx_ring = NULL;
+ q_vector->rx.rx_ring = NULL;
+ }
+ }
+ }
+
+ if (vsi->tx_rings) {
+ ice_for_each_alloc_txq(vsi, i) {
+ if (vsi->tx_rings[i]) {
+ kfree_rcu(vsi->tx_rings[i], rcu);
+ WRITE_ONCE(vsi->tx_rings[i], NULL);
+ }
+ }
+ }
+ if (vsi->rx_rings) {
+ ice_for_each_alloc_rxq(vsi, i) {
+ if (vsi->rx_rings[i]) {
+ kfree_rcu(vsi->rx_rings[i], rcu);
+ WRITE_ONCE(vsi->rx_rings[i], NULL);
+ }
+ }
+ }
+}
+
+/**
+ * ice_vsi_alloc_rings - Allocates Tx and Rx rings for the VSI
+ * @vsi: VSI which is having rings allocated
+ */
+static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
+{
+ bool dvm_ena = ice_is_dvm_ena(&vsi->back->hw);
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ u16 i;
+
+ dev = ice_pf_to_dev(pf);
+ /* Allocate Tx rings */
+ ice_for_each_alloc_txq(vsi, i) {
+ struct ice_tx_ring *ring;
+
+ /* allocate with kzalloc(), free with kfree_rcu() */
+ ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+
+ if (!ring)
+ goto err_out;
+
+ ring->q_index = i;
+ ring->reg_idx = vsi->txq_map[i];
+ ring->vsi = vsi;
+ ring->tx_tstamps = &pf->ptp.port.tx;
+ ring->dev = dev;
+ ring->count = vsi->num_tx_desc;
+ ring->txq_teid = ICE_INVAL_TEID;
+ if (dvm_ena)
+ ring->flags |= ICE_TX_FLAGS_RING_VLAN_L2TAG2;
+ else
+ ring->flags |= ICE_TX_FLAGS_RING_VLAN_L2TAG1;
+ WRITE_ONCE(vsi->tx_rings[i], ring);
+ }
+
+ /* Allocate Rx rings */
+ ice_for_each_alloc_rxq(vsi, i) {
+ struct ice_rx_ring *ring;
+
+ /* allocate with kzalloc(), free with kfree_rcu() */
+ ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+ if (!ring)
+ goto err_out;
+
+ ring->q_index = i;
+ ring->reg_idx = vsi->rxq_map[i];
+ ring->vsi = vsi;
+ ring->netdev = vsi->netdev;
+ ring->dev = dev;
+ ring->count = vsi->num_rx_desc;
+ ring->cached_phctime = pf->ptp.cached_phc_time;
+ WRITE_ONCE(vsi->rx_rings[i], ring);
+ }
+
+ return 0;
+
+err_out:
+ ice_vsi_clear_rings(vsi);
+ return -ENOMEM;
+}
+
+/**
+ * ice_vsi_manage_rss_lut - disable/enable RSS
+ * @vsi: the VSI being changed
+ * @ena: boolean value indicating if this is an enable or disable request
+ *
+ * In the event of disable request for RSS, this function will zero out RSS
+ * LUT, while in the event of enable request for RSS, it will reconfigure RSS
+ * LUT.
+ */
+void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena)
+{
+ u8 *lut;
+
+ lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+ if (!lut)
+ return;
+
+ if (ena) {
+ if (vsi->rss_lut_user)
+ memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+ else
+ ice_fill_rss_lut(lut, vsi->rss_table_size,
+ vsi->rss_size);
+ }
+
+ ice_set_rss_lut(vsi, lut, vsi->rss_table_size);
+ kfree(lut);
+}
+
+/**
+ * ice_vsi_cfg_crc_strip - Configure CRC stripping for a VSI
+ * @vsi: VSI to be configured
+ * @disable: set to true to have FCS / CRC in the frame data
+ */
+void ice_vsi_cfg_crc_strip(struct ice_vsi *vsi, bool disable)
+{
+ int i;
+
+ ice_for_each_rxq(vsi, i)
+ if (disable)
+ vsi->rx_rings[i]->flags |= ICE_RX_FLAGS_CRC_STRIP_DIS;
+ else
+ vsi->rx_rings[i]->flags &= ~ICE_RX_FLAGS_CRC_STRIP_DIS;
+}
+
+/**
+ * ice_vsi_cfg_rss_lut_key - Configure RSS params for a VSI
+ * @vsi: VSI to be configured
+ */
+int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ u8 *lut, *key;
+ int err;
+
+ dev = ice_pf_to_dev(pf);
+ if (vsi->type == ICE_VSI_PF && vsi->ch_rss_size &&
+ (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))) {
+ vsi->rss_size = min_t(u16, vsi->rss_size, vsi->ch_rss_size);
+ } else {
+ vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq);
+
+ /* If orig_rss_size is valid and it is less than determined
+ * main VSI's rss_size, update main VSI's rss_size to be
+ * orig_rss_size so that when tc-qdisc is deleted, main VSI
+ * RSS table gets programmed to be correct (whatever it was
+ * to begin with (prior to setup-tc for ADQ config)
+ */
+ if (vsi->orig_rss_size && vsi->rss_size < vsi->orig_rss_size &&
+ vsi->orig_rss_size <= vsi->num_rxq) {
+ vsi->rss_size = vsi->orig_rss_size;
+ /* now orig_rss_size is used, reset it to zero */
+ vsi->orig_rss_size = 0;
+ }
+ }
+
+ lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+ if (!lut)
+ return -ENOMEM;
+
+ if (vsi->rss_lut_user)
+ memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+ else
+ ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
+
+ err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size);
+ if (err) {
+ dev_err(dev, "set_rss_lut failed, error %d\n", err);
+ goto ice_vsi_cfg_rss_exit;
+ }
+
+ key = kzalloc(ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE, GFP_KERNEL);
+ if (!key) {
+ err = -ENOMEM;
+ goto ice_vsi_cfg_rss_exit;
+ }
+
+ if (vsi->rss_hkey_user)
+ memcpy(key, vsi->rss_hkey_user, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
+ else
+ netdev_rss_key_fill((void *)key, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
+
+ err = ice_set_rss_key(vsi, key);
+ if (err)
+ dev_err(dev, "set_rss_key failed, error %d\n", err);
+
+ kfree(key);
+ice_vsi_cfg_rss_exit:
+ kfree(lut);
+ return err;
+}
+
+/**
+ * ice_vsi_set_vf_rss_flow_fld - Sets VF VSI RSS input set for different flows
+ * @vsi: VSI to be configured
+ *
+ * This function will only be called during the VF VSI setup. Upon successful
+ * completion of package download, this function will configure default RSS
+ * input sets for VF VSI.
+ */
+static void ice_vsi_set_vf_rss_flow_fld(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ int status;
+
+ dev = ice_pf_to_dev(pf);
+ if (ice_is_safe_mode(pf)) {
+ dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
+ vsi->vsi_num);
+ return;
+ }
+
+ status = ice_add_avf_rss_cfg(&pf->hw, vsi->idx, ICE_DEFAULT_RSS_HENA);
+ if (status)
+ dev_dbg(dev, "ice_add_avf_rss_cfg failed for vsi = %d, error = %d\n",
+ vsi->vsi_num, status);
+}
+
+/**
+ * ice_vsi_set_rss_flow_fld - Sets RSS input set for different flows
+ * @vsi: VSI to be configured
+ *
+ * This function will only be called after successful download package call
+ * during initialization of PF. Since the downloaded package will erase the
+ * RSS section, this function will configure RSS input sets for different
+ * flow types. The last profile added has the highest priority, therefore 2
+ * tuple profiles (i.e. IPv4 src/dst) are added before 4 tuple profiles
+ * (i.e. IPv4 src/dst TCP src/dst port).
+ */
+static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi)
+{
+ u16 vsi_handle = vsi->idx, vsi_num = vsi->vsi_num;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ struct device *dev;
+ int status;
+
+ dev = ice_pf_to_dev(pf);
+ if (ice_is_safe_mode(pf)) {
+ dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
+ vsi_num);
+ return;
+ }
+ /* configure RSS for IPv4 with input set IP src/dst */
+ status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV4,
+ ICE_FLOW_SEG_HDR_IPV4);
+ if (status)
+ dev_dbg(dev, "ice_add_rss_cfg failed for ipv4 flow, vsi = %d, error = %d\n",
+ vsi_num, status);
+
+ /* configure RSS for IPv6 with input set IPv6 src/dst */
+ status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV6,
+ ICE_FLOW_SEG_HDR_IPV6);
+ if (status)
+ dev_dbg(dev, "ice_add_rss_cfg failed for ipv6 flow, vsi = %d, error = %d\n",
+ vsi_num, status);
+
+ /* configure RSS for tcp4 with input set IP src/dst, TCP src/dst */
+ status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_TCP_IPV4,
+ ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV4);
+ if (status)
+ dev_dbg(dev, "ice_add_rss_cfg failed for tcp4 flow, vsi = %d, error = %d\n",
+ vsi_num, status);
+
+ /* configure RSS for udp4 with input set IP src/dst, UDP src/dst */
+ status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_UDP_IPV4,
+ ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV4);
+ if (status)
+ dev_dbg(dev, "ice_add_rss_cfg failed for udp4 flow, vsi = %d, error = %d\n",
+ vsi_num, status);
+
+ /* configure RSS for sctp4 with input set IP src/dst */
+ status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV4,
+ ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV4);
+ if (status)
+ dev_dbg(dev, "ice_add_rss_cfg failed for sctp4 flow, vsi = %d, error = %d\n",
+ vsi_num, status);
+
+ /* configure RSS for tcp6 with input set IPv6 src/dst, TCP src/dst */
+ status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_TCP_IPV6,
+ ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV6);
+ if (status)
+ dev_dbg(dev, "ice_add_rss_cfg failed for tcp6 flow, vsi = %d, error = %d\n",
+ vsi_num, status);
+
+ /* configure RSS for udp6 with input set IPv6 src/dst, UDP src/dst */
+ status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_UDP_IPV6,
+ ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV6);
+ if (status)
+ dev_dbg(dev, "ice_add_rss_cfg failed for udp6 flow, vsi = %d, error = %d\n",
+ vsi_num, status);
+
+ /* configure RSS for sctp6 with input set IPv6 src/dst */
+ status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV6,
+ ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV6);
+ if (status)
+ dev_dbg(dev, "ice_add_rss_cfg failed for sctp6 flow, vsi = %d, error = %d\n",
+ vsi_num, status);
+
+ status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_ESP_SPI,
+ ICE_FLOW_SEG_HDR_ESP);
+ if (status)
+ dev_dbg(dev, "ice_add_rss_cfg failed for esp/spi flow, vsi = %d, error = %d\n",
+ vsi_num, status);
+}
+
+/**
+ * ice_pf_state_is_nominal - checks the PF for nominal state
+ * @pf: pointer to PF to check
+ *
+ * Check the PF's state for a collection of bits that would indicate
+ * the PF is in a state that would inhibit normal operation for
+ * driver functionality.
+ *
+ * Returns true if PF is in a nominal state, false otherwise
+ */
+bool ice_pf_state_is_nominal(struct ice_pf *pf)
+{
+ DECLARE_BITMAP(check_bits, ICE_STATE_NBITS) = { 0 };
+
+ if (!pf)
+ return false;
+
+ bitmap_set(check_bits, 0, ICE_STATE_NOMINAL_CHECK_BITS);
+ if (bitmap_intersects(pf->state, check_bits, ICE_STATE_NBITS))
+ return false;
+
+ return true;
+}
+
+/**
+ * ice_update_eth_stats - Update VSI-specific ethernet statistics counters
+ * @vsi: the VSI to be updated
+ */
+void ice_update_eth_stats(struct ice_vsi *vsi)
+{
+ struct ice_eth_stats *prev_es, *cur_es;
+ struct ice_hw *hw = &vsi->back->hw;
+ u16 vsi_num = vsi->vsi_num; /* HW absolute index of a VSI */
+
+ prev_es = &vsi->eth_stats_prev;
+ cur_es = &vsi->eth_stats;
+
+ ice_stat_update40(hw, GLV_GORCL(vsi_num), vsi->stat_offsets_loaded,
+ &prev_es->rx_bytes, &cur_es->rx_bytes);
+
+ ice_stat_update40(hw, GLV_UPRCL(vsi_num), vsi->stat_offsets_loaded,
+ &prev_es->rx_unicast, &cur_es->rx_unicast);
+
+ ice_stat_update40(hw, GLV_MPRCL(vsi_num), vsi->stat_offsets_loaded,
+ &prev_es->rx_multicast, &cur_es->rx_multicast);
+
+ ice_stat_update40(hw, GLV_BPRCL(vsi_num), vsi->stat_offsets_loaded,
+ &prev_es->rx_broadcast, &cur_es->rx_broadcast);
+
+ ice_stat_update32(hw, GLV_RDPC(vsi_num), vsi->stat_offsets_loaded,
+ &prev_es->rx_discards, &cur_es->rx_discards);
+
+ ice_stat_update40(hw, GLV_GOTCL(vsi_num), vsi->stat_offsets_loaded,
+ &prev_es->tx_bytes, &cur_es->tx_bytes);
+
+ ice_stat_update40(hw, GLV_UPTCL(vsi_num), vsi->stat_offsets_loaded,
+ &prev_es->tx_unicast, &cur_es->tx_unicast);
+
+ ice_stat_update40(hw, GLV_MPTCL(vsi_num), vsi->stat_offsets_loaded,
+ &prev_es->tx_multicast, &cur_es->tx_multicast);
+
+ ice_stat_update40(hw, GLV_BPTCL(vsi_num), vsi->stat_offsets_loaded,
+ &prev_es->tx_broadcast, &cur_es->tx_broadcast);
+
+ ice_stat_update32(hw, GLV_TEPC(vsi_num), vsi->stat_offsets_loaded,
+ &prev_es->tx_errors, &cur_es->tx_errors);
+
+ vsi->stat_offsets_loaded = true;
+}
+
+/**
+ * ice_vsi_cfg_frame_size - setup max frame size and Rx buffer length
+ * @vsi: VSI
+ */
+void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
+{
+ if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) {
+ vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
+ vsi->rx_buf_len = ICE_RXBUF_2048;
+#if (PAGE_SIZE < 8192)
+ } else if (!ICE_2K_TOO_SMALL_WITH_PADDING &&
+ (vsi->netdev->mtu <= ETH_DATA_LEN)) {
+ vsi->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN;
+ vsi->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN;
+#endif
+ } else {
+ vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
+#if (PAGE_SIZE < 8192)
+ vsi->rx_buf_len = ICE_RXBUF_3072;
+#else
+ vsi->rx_buf_len = ICE_RXBUF_2048;
+#endif
+ }
+}
+
+/**
+ * ice_write_qrxflxp_cntxt - write/configure QRXFLXP_CNTXT register
+ * @hw: HW pointer
+ * @pf_q: index of the Rx queue in the PF's queue space
+ * @rxdid: flexible descriptor RXDID
+ * @prio: priority for the RXDID for this queue
+ * @ena_ts: true to enable timestamp and false to disable timestamp
+ */
+void
+ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio,
+ bool ena_ts)
+{
+ int regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
+
+ /* clear any previous values */
+ regval &= ~(QRXFLXP_CNTXT_RXDID_IDX_M |
+ QRXFLXP_CNTXT_RXDID_PRIO_M |
+ QRXFLXP_CNTXT_TS_M);
+
+ regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
+ QRXFLXP_CNTXT_RXDID_IDX_M;
+
+ regval |= (prio << QRXFLXP_CNTXT_RXDID_PRIO_S) &
+ QRXFLXP_CNTXT_RXDID_PRIO_M;
+
+ if (ena_ts)
+ /* Enable TimeSync on this queue */
+ regval |= QRXFLXP_CNTXT_TS_M;
+
+ wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
+}
+
+int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx)
+{
+ if (q_idx >= vsi->num_rxq)
+ return -EINVAL;
+
+ return ice_vsi_cfg_rxq(vsi->rx_rings[q_idx]);
+}
+
+int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_tx_ring **tx_rings, u16 q_idx)
+{
+ struct ice_aqc_add_tx_qgrp *qg_buf;
+ int err;
+
+ if (q_idx >= vsi->alloc_txq || !tx_rings || !tx_rings[q_idx])
+ return -EINVAL;
+
+ qg_buf = kzalloc(struct_size(qg_buf, txqs, 1), GFP_KERNEL);
+ if (!qg_buf)
+ return -ENOMEM;
+
+ qg_buf->num_txqs = 1;
+
+ err = ice_vsi_cfg_txq(vsi, tx_rings[q_idx], qg_buf);
+ kfree(qg_buf);
+ return err;
+}
+
+/**
+ * ice_vsi_cfg_rxqs - Configure the VSI for Rx
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Rx VSI for operation.
+ */
+int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
+{
+ u16 i;
+
+ if (vsi->type == ICE_VSI_VF)
+ goto setup_rings;
+
+ ice_vsi_cfg_frame_size(vsi);
+setup_rings:
+ /* set up individual rings */
+ ice_for_each_rxq(vsi, i) {
+ int err = ice_vsi_cfg_rxq(vsi->rx_rings[i]);
+
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vsi_cfg_txqs - Configure the VSI for Tx
+ * @vsi: the VSI being configured
+ * @rings: Tx ring array to be configured
+ * @count: number of Tx ring array elements
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx VSI for operation.
+ */
+static int
+ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_tx_ring **rings, u16 count)
+{
+ struct ice_aqc_add_tx_qgrp *qg_buf;
+ u16 q_idx = 0;
+ int err = 0;
+
+ qg_buf = kzalloc(struct_size(qg_buf, txqs, 1), GFP_KERNEL);
+ if (!qg_buf)
+ return -ENOMEM;
+
+ qg_buf->num_txqs = 1;
+
+ for (q_idx = 0; q_idx < count; q_idx++) {
+ err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf);
+ if (err)
+ goto err_cfg_txqs;
+ }
+
+err_cfg_txqs:
+ kfree(qg_buf);
+ return err;
+}
+
+/**
+ * ice_vsi_cfg_lan_txqs - Configure the VSI for Tx
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx VSI for operation.
+ */
+int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
+{
+ return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, vsi->num_txq);
+}
+
+/**
+ * ice_vsi_cfg_xdp_txqs - Configure Tx queues dedicated for XDP in given VSI
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx queues dedicated for XDP in given VSI for operation.
+ */
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
+{
+ int ret;
+ int i;
+
+ ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings, vsi->num_xdp_txq);
+ if (ret)
+ return ret;
+
+ ice_for_each_rxq(vsi, i)
+ ice_tx_xsk_pool(vsi, i);
+
+ return ret;
+}
+
+/**
+ * ice_intrl_usec_to_reg - convert interrupt rate limit to register value
+ * @intrl: interrupt rate limit in usecs
+ * @gran: interrupt rate limit granularity in usecs
+ *
+ * This function converts a decimal interrupt rate limit in usecs to the format
+ * expected by firmware.
+ */
+static u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
+{
+ u32 val = intrl / gran;
+
+ if (val)
+ return val | GLINT_RATE_INTRL_ENA_M;
+ return 0;
+}
+
+/**
+ * ice_write_intrl - write throttle rate limit to interrupt specific register
+ * @q_vector: pointer to interrupt specific structure
+ * @intrl: throttle rate limit in microseconds to write
+ */
+void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl)
+{
+ struct ice_hw *hw = &q_vector->vsi->back->hw;
+
+ wr32(hw, GLINT_RATE(q_vector->reg_idx),
+ ice_intrl_usec_to_reg(intrl, ICE_INTRL_GRAN_ABOVE_25));
+}
+
+static struct ice_q_vector *ice_pull_qvec_from_rc(struct ice_ring_container *rc)
+{
+ switch (rc->type) {
+ case ICE_RX_CONTAINER:
+ if (rc->rx_ring)
+ return rc->rx_ring->q_vector;
+ break;
+ case ICE_TX_CONTAINER:
+ if (rc->tx_ring)
+ return rc->tx_ring->q_vector;
+ break;
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+/**
+ * __ice_write_itr - write throttle rate to register
+ * @q_vector: pointer to interrupt data structure
+ * @rc: pointer to ring container
+ * @itr: throttle rate in microseconds to write
+ */
+static void __ice_write_itr(struct ice_q_vector *q_vector,
+ struct ice_ring_container *rc, u16 itr)
+{
+ struct ice_hw *hw = &q_vector->vsi->back->hw;
+
+ wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+ ITR_REG_ALIGN(itr) >> ICE_ITR_GRAN_S);
+}
+
+/**
+ * ice_write_itr - write throttle rate to queue specific register
+ * @rc: pointer to ring container
+ * @itr: throttle rate in microseconds to write
+ */
+void ice_write_itr(struct ice_ring_container *rc, u16 itr)
+{
+ struct ice_q_vector *q_vector;
+
+ q_vector = ice_pull_qvec_from_rc(rc);
+ if (!q_vector)
+ return;
+
+ __ice_write_itr(q_vector, rc, itr);
+}
+
+/**
+ * ice_set_q_vector_intrl - set up interrupt rate limiting
+ * @q_vector: the vector to be configured
+ *
+ * Interrupt rate limiting is local to the vector, not per-queue so we must
+ * detect if either ring container has dynamic moderation enabled to decide
+ * what to set the interrupt rate limit to via INTRL settings. In the case that
+ * dynamic moderation is disabled on both, write the value with the cached
+ * setting to make sure INTRL register matches the user visible value.
+ */
+void ice_set_q_vector_intrl(struct ice_q_vector *q_vector)
+{
+ if (ITR_IS_DYNAMIC(&q_vector->tx) || ITR_IS_DYNAMIC(&q_vector->rx)) {
+ /* in the case of dynamic enabled, cap each vector to no more
+ * than (4 us) 250,000 ints/sec, which allows low latency
+ * but still less than 500,000 interrupts per second, which
+ * reduces CPU a bit in the case of the lowest latency
+ * setting. The 4 here is a value in microseconds.
+ */
+ ice_write_intrl(q_vector, 4);
+ } else {
+ ice_write_intrl(q_vector, q_vector->intrl);
+ }
+}
+
+/**
+ * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW
+ * @vsi: the VSI being configured
+ *
+ * This configures MSIX mode interrupts for the PF VSI, and should not be used
+ * for the VF VSI.
+ */
+void ice_vsi_cfg_msix(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u16 txq = 0, rxq = 0;
+ int i, q;
+
+ ice_for_each_q_vector(vsi, i) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[i];
+ u16 reg_idx = q_vector->reg_idx;
+
+ ice_cfg_itr(hw, q_vector);
+
+ /* Both Transmit Queue Interrupt Cause Control register
+ * and Receive Queue Interrupt Cause control register
+ * expects MSIX_INDX field to be the vector index
+ * within the function space and not the absolute
+ * vector index across PF or across device.
+ * For SR-IOV VF VSIs queue vector index always starts
+ * with 1 since first vector index(0) is used for OICR
+ * in VF space. Since VMDq and other PF VSIs are within
+ * the PF function space, use the vector index that is
+ * tracked for this PF.
+ */
+ for (q = 0; q < q_vector->num_ring_tx; q++) {
+ ice_cfg_txq_interrupt(vsi, txq, reg_idx,
+ q_vector->tx.itr_idx);
+ txq++;
+ }
+
+ for (q = 0; q < q_vector->num_ring_rx; q++) {
+ ice_cfg_rxq_interrupt(vsi, rxq, reg_idx,
+ q_vector->rx.itr_idx);
+ rxq++;
+ }
+ }
+}
+
+/**
+ * ice_vsi_start_all_rx_rings - start/enable all of a VSI's Rx rings
+ * @vsi: the VSI whose rings are to be enabled
+ *
+ * Returns 0 on success and a negative value on error
+ */
+int ice_vsi_start_all_rx_rings(struct ice_vsi *vsi)
+{
+ return ice_vsi_ctrl_all_rx_rings(vsi, true);
+}
+
+/**
+ * ice_vsi_stop_all_rx_rings - stop/disable all of a VSI's Rx rings
+ * @vsi: the VSI whose rings are to be disabled
+ *
+ * Returns 0 on success and a negative value on error
+ */
+int ice_vsi_stop_all_rx_rings(struct ice_vsi *vsi)
+{
+ return ice_vsi_ctrl_all_rx_rings(vsi, false);
+}
+
+/**
+ * ice_vsi_stop_tx_rings - Disable Tx rings
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ * @rings: Tx ring array to be stopped
+ * @count: number of Tx ring array elements
+ */
+static int
+ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+ u16 rel_vmvf_num, struct ice_tx_ring **rings, u16 count)
+{
+ u16 q_idx;
+
+ if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
+ return -EINVAL;
+
+ for (q_idx = 0; q_idx < count; q_idx++) {
+ struct ice_txq_meta txq_meta = { };
+ int status;
+
+ if (!rings || !rings[q_idx])
+ return -EINVAL;
+
+ ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta);
+ status = ice_vsi_stop_tx_ring(vsi, rst_src, rel_vmvf_num,
+ rings[q_idx], &txq_meta);
+
+ if (status)
+ return status;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vsi_stop_lan_tx_rings - Disable LAN Tx rings
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ */
+int
+ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+ u16 rel_vmvf_num)
+{
+ return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings, vsi->num_txq);
+}
+
+/**
+ * ice_vsi_stop_xdp_tx_rings - Disable XDP Tx rings
+ * @vsi: the VSI being configured
+ */
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi)
+{
+ return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings, vsi->num_xdp_txq);
+}
+
+/**
+ * ice_vsi_is_rx_queue_active
+ * @vsi: the VSI being configured
+ *
+ * Return true if at least one queue is active.
+ */
+bool ice_vsi_is_rx_queue_active(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ int i;
+
+ ice_for_each_rxq(vsi, i) {
+ u32 rx_reg;
+ int pf_q;
+
+ pf_q = vsi->rxq_map[i];
+ rx_reg = rd32(hw, QRX_CTRL(pf_q));
+ if (rx_reg & QRX_CTRL_QENA_STAT_M)
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * ice_vsi_is_vlan_pruning_ena - check if VLAN pruning is enabled or not
+ * @vsi: VSI to check whether or not VLAN pruning is enabled.
+ *
+ * returns true if Rx VLAN pruning is enabled and false otherwise.
+ */
+bool ice_vsi_is_vlan_pruning_ena(struct ice_vsi *vsi)
+{
+ if (!vsi)
+ return false;
+
+ return (vsi->info.sw_flags2 & ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA);
+}
+
+static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi)
+{
+ if (!test_bit(ICE_FLAG_DCB_ENA, vsi->back->flags)) {
+ vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS;
+ vsi->tc_cfg.numtc = 1;
+ return;
+ }
+
+ /* set VSI TC information based on DCB config */
+ ice_vsi_set_dcb_tc_cfg(vsi);
+}
+
+/**
+ * ice_vsi_set_q_vectors_reg_idx - set the HW register index for all q_vectors
+ * @vsi: VSI to set the q_vectors register index on
+ */
+static int
+ice_vsi_set_q_vectors_reg_idx(struct ice_vsi *vsi)
+{
+ u16 i;
+
+ if (!vsi || !vsi->q_vectors)
+ return -EINVAL;
+
+ ice_for_each_q_vector(vsi, i) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+ if (!q_vector) {
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to set reg_idx on q_vector %d VSI %d\n",
+ i, vsi->vsi_num);
+ goto clear_reg_idx;
+ }
+
+ if (vsi->type == ICE_VSI_VF) {
+ struct ice_vf *vf = vsi->vf;
+
+ q_vector->reg_idx = ice_calc_vf_reg_idx(vf, q_vector);
+ } else {
+ q_vector->reg_idx =
+ q_vector->v_idx + vsi->base_vector;
+ }
+ }
+
+ return 0;
+
+clear_reg_idx:
+ ice_for_each_q_vector(vsi, i) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+ if (q_vector)
+ q_vector->reg_idx = 0;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * ice_cfg_sw_lldp - Config switch rules for LLDP packet handling
+ * @vsi: the VSI being configured
+ * @tx: bool to determine Tx or Rx rule
+ * @create: bool to determine create or remove Rule
+ */
+void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
+{
+ int (*eth_fltr)(struct ice_vsi *v, u16 type, u16 flag,
+ enum ice_sw_fwd_act_type act);
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ int status;
+
+ dev = ice_pf_to_dev(pf);
+ eth_fltr = create ? ice_fltr_add_eth : ice_fltr_remove_eth;
+
+ if (tx) {
+ status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_TX,
+ ICE_DROP_PACKET);
+ } else {
+ if (ice_fw_supports_lldp_fltr_ctrl(&pf->hw)) {
+ status = ice_lldp_fltr_add_remove(&pf->hw, vsi->vsi_num,
+ create);
+ } else {
+ status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_RX,
+ ICE_FWD_TO_VSI);
+ }
+ }
+
+ if (status)
+ dev_dbg(dev, "Fail %s %s LLDP rule on VSI %i error: %d\n",
+ create ? "adding" : "removing", tx ? "TX" : "RX",
+ vsi->vsi_num, status);
+}
+
+/**
+ * ice_set_agg_vsi - sets up scheduler aggregator node and move VSI into it
+ * @vsi: pointer to the VSI
+ *
+ * This function will allocate new scheduler aggregator now if needed and will
+ * move specified VSI into it.
+ */
+static void ice_set_agg_vsi(struct ice_vsi *vsi)
+{
+ struct device *dev = ice_pf_to_dev(vsi->back);
+ struct ice_agg_node *agg_node_iter = NULL;
+ u32 agg_id = ICE_INVALID_AGG_NODE_ID;
+ struct ice_agg_node *agg_node = NULL;
+ int node_offset, max_agg_nodes = 0;
+ struct ice_port_info *port_info;
+ struct ice_pf *pf = vsi->back;
+ u32 agg_node_id_start = 0;
+ int status;
+
+ /* create (as needed) scheduler aggregator node and move VSI into
+ * corresponding aggregator node
+ * - PF aggregator node to contains VSIs of type _PF and _CTRL
+ * - VF aggregator nodes will contain VF VSI
+ */
+ port_info = pf->hw.port_info;
+ if (!port_info)
+ return;
+
+ switch (vsi->type) {
+ case ICE_VSI_CTRL:
+ case ICE_VSI_CHNL:
+ case ICE_VSI_LB:
+ case ICE_VSI_PF:
+ case ICE_VSI_SWITCHDEV_CTRL:
+ max_agg_nodes = ICE_MAX_PF_AGG_NODES;
+ agg_node_id_start = ICE_PF_AGG_NODE_ID_START;
+ agg_node_iter = &pf->pf_agg_node[0];
+ break;
+ case ICE_VSI_VF:
+ /* user can create 'n' VFs on a given PF, but since max children
+ * per aggregator node can be only 64. Following code handles
+ * aggregator(s) for VF VSIs, either selects a agg_node which
+ * was already created provided num_vsis < 64, otherwise
+ * select next available node, which will be created
+ */
+ max_agg_nodes = ICE_MAX_VF_AGG_NODES;
+ agg_node_id_start = ICE_VF_AGG_NODE_ID_START;
+ agg_node_iter = &pf->vf_agg_node[0];
+ break;
+ default:
+ /* other VSI type, handle later if needed */
+ dev_dbg(dev, "unexpected VSI type %s\n",
+ ice_vsi_type_str(vsi->type));
+ return;
+ }
+
+ /* find the appropriate aggregator node */
+ for (node_offset = 0; node_offset < max_agg_nodes; node_offset++) {
+ /* see if we can find space in previously created
+ * node if num_vsis < 64, otherwise skip
+ */
+ if (agg_node_iter->num_vsis &&
+ agg_node_iter->num_vsis == ICE_MAX_VSIS_IN_AGG_NODE) {
+ agg_node_iter++;
+ continue;
+ }
+
+ if (agg_node_iter->valid &&
+ agg_node_iter->agg_id != ICE_INVALID_AGG_NODE_ID) {
+ agg_id = agg_node_iter->agg_id;
+ agg_node = agg_node_iter;
+ break;
+ }
+
+ /* find unclaimed agg_id */
+ if (agg_node_iter->agg_id == ICE_INVALID_AGG_NODE_ID) {
+ agg_id = node_offset + agg_node_id_start;
+ agg_node = agg_node_iter;
+ break;
+ }
+ /* move to next agg_node */
+ agg_node_iter++;
+ }
+
+ if (!agg_node)
+ return;
+
+ /* if selected aggregator node was not created, create it */
+ if (!agg_node->valid) {
+ status = ice_cfg_agg(port_info, agg_id, ICE_AGG_TYPE_AGG,
+ (u8)vsi->tc_cfg.ena_tc);
+ if (status) {
+ dev_err(dev, "unable to create aggregator node with agg_id %u\n",
+ agg_id);
+ return;
+ }
+ /* aggregator node is created, store the needed info */
+ agg_node->valid = true;
+ agg_node->agg_id = agg_id;
+ }
+
+ /* move VSI to corresponding aggregator node */
+ status = ice_move_vsi_to_agg(port_info, agg_id, vsi->idx,
+ (u8)vsi->tc_cfg.ena_tc);
+ if (status) {
+ dev_err(dev, "unable to move VSI idx %u into aggregator %u node",
+ vsi->idx, agg_id);
+ return;
+ }
+
+ /* keep active children count for aggregator node */
+ agg_node->num_vsis++;
+
+ /* cache the 'agg_id' in VSI, so that after reset - VSI will be moved
+ * to aggregator node
+ */
+ vsi->agg_node = agg_node;
+ dev_dbg(dev, "successfully moved VSI idx %u tc_bitmap 0x%x) into aggregator node %d which has num_vsis %u\n",
+ vsi->idx, vsi->tc_cfg.ena_tc, vsi->agg_node->agg_id,
+ vsi->agg_node->num_vsis);
+}
+
+/**
+ * ice_vsi_setup - Set up a VSI by a given type
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ * @vsi_type: VSI type
+ * @vf: pointer to VF to which this VSI connects. This field is used primarily
+ * for the ICE_VSI_VF type. Other VSI types should pass NULL.
+ * @ch: ptr to channel
+ *
+ * This allocates the sw VSI structure and its queue resources.
+ *
+ * Returns pointer to the successfully allocated and configured VSI sw struct on
+ * success, NULL on failure.
+ */
+struct ice_vsi *
+ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
+ enum ice_vsi_type vsi_type, struct ice_vf *vf,
+ struct ice_channel *ch)
+{
+ u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_vsi *vsi;
+ int ret, i;
+
+ if (vsi_type == ICE_VSI_CHNL)
+ vsi = ice_vsi_alloc(pf, vsi_type, ch, NULL);
+ else if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL)
+ vsi = ice_vsi_alloc(pf, vsi_type, NULL, vf);
+ else
+ vsi = ice_vsi_alloc(pf, vsi_type, NULL, NULL);
+
+ if (!vsi) {
+ dev_err(dev, "could not allocate VSI\n");
+ return NULL;
+ }
+
+ vsi->port_info = pi;
+ vsi->vsw = pf->first_sw;
+ if (vsi->type == ICE_VSI_PF)
+ vsi->ethtype = ETH_P_PAUSE;
+
+ ice_alloc_fd_res(vsi);
+
+ if (vsi_type != ICE_VSI_CHNL) {
+ if (ice_vsi_get_qs(vsi)) {
+ dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n",
+ vsi->idx);
+ goto unroll_vsi_alloc;
+ }
+ }
+
+ /* set RSS capabilities */
+ ice_vsi_set_rss_params(vsi);
+
+ /* set TC configuration */
+ ice_vsi_set_tc_cfg(vsi);
+
+ /* create the VSI */
+ ret = ice_vsi_init(vsi, true);
+ if (ret)
+ goto unroll_get_qs;
+
+ ice_vsi_init_vlan_ops(vsi);
+
+ switch (vsi->type) {
+ case ICE_VSI_CTRL:
+ case ICE_VSI_SWITCHDEV_CTRL:
+ case ICE_VSI_PF:
+ ret = ice_vsi_alloc_q_vectors(vsi);
+ if (ret)
+ goto unroll_vsi_init;
+
+ ret = ice_vsi_setup_vector_base(vsi);
+ if (ret)
+ goto unroll_alloc_q_vector;
+
+ ret = ice_vsi_set_q_vectors_reg_idx(vsi);
+ if (ret)
+ goto unroll_vector_base;
+
+ ret = ice_vsi_alloc_rings(vsi);
+ if (ret)
+ goto unroll_vector_base;
+
+ ice_vsi_map_rings_to_vectors(vsi);
+
+ /* ICE_VSI_CTRL does not need RSS so skip RSS processing */
+ if (vsi->type != ICE_VSI_CTRL)
+ /* Do not exit if configuring RSS had an issue, at
+ * least receive traffic on first queue. Hence no
+ * need to capture return value
+ */
+ if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+ ice_vsi_cfg_rss_lut_key(vsi);
+ ice_vsi_set_rss_flow_fld(vsi);
+ }
+ ice_init_arfs(vsi);
+ break;
+ case ICE_VSI_CHNL:
+ if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+ ice_vsi_cfg_rss_lut_key(vsi);
+ ice_vsi_set_rss_flow_fld(vsi);
+ }
+ break;
+ case ICE_VSI_VF:
+ /* VF driver will take care of creating netdev for this type and
+ * map queues to vectors through Virtchnl, PF driver only
+ * creates a VSI and corresponding structures for bookkeeping
+ * purpose
+ */
+ ret = ice_vsi_alloc_q_vectors(vsi);
+ if (ret)
+ goto unroll_vsi_init;
+
+ ret = ice_vsi_alloc_rings(vsi);
+ if (ret)
+ goto unroll_alloc_q_vector;
+
+ ret = ice_vsi_set_q_vectors_reg_idx(vsi);
+ if (ret)
+ goto unroll_vector_base;
+
+ /* Do not exit if configuring RSS had an issue, at least
+ * receive traffic on first queue. Hence no need to capture
+ * return value
+ */
+ if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+ ice_vsi_cfg_rss_lut_key(vsi);
+ ice_vsi_set_vf_rss_flow_fld(vsi);
+ }
+ break;
+ case ICE_VSI_LB:
+ ret = ice_vsi_alloc_rings(vsi);
+ if (ret)
+ goto unroll_vsi_init;
+ break;
+ default:
+ /* clean up the resources and exit */
+ goto unroll_vsi_init;
+ }
+
+ /* configure VSI nodes based on number of queues and TC's */
+ ice_for_each_traffic_class(i) {
+ if (!(vsi->tc_cfg.ena_tc & BIT(i)))
+ continue;
+
+ if (vsi->type == ICE_VSI_CHNL) {
+ if (!vsi->alloc_txq && vsi->num_txq)
+ max_txqs[i] = vsi->num_txq;
+ else
+ max_txqs[i] = pf->num_lan_tx;
+ } else {
+ max_txqs[i] = vsi->alloc_txq;
+ }
+ }
+
+ dev_dbg(dev, "vsi->tc_cfg.ena_tc = %d\n", vsi->tc_cfg.ena_tc);
+ ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+ max_txqs);
+ if (ret) {
+ dev_err(dev, "VSI %d failed lan queue config, error %d\n",
+ vsi->vsi_num, ret);
+ goto unroll_clear_rings;
+ }
+
+ /* Add switch rule to drop all Tx Flow Control Frames, of look up
+ * type ETHERTYPE from VSIs, and restrict malicious VF from sending
+ * out PAUSE or PFC frames. If enabled, FW can still send FC frames.
+ * The rule is added once for PF VSI in order to create appropriate
+ * recipe, since VSI/VSI list is ignored with drop action...
+ * Also add rules to handle LLDP Tx packets. Tx LLDP packets need to
+ * be dropped so that VFs cannot send LLDP packets to reconfig DCB
+ * settings in the HW.
+ */
+ if (!ice_is_safe_mode(pf))
+ if (vsi->type == ICE_VSI_PF) {
+ ice_fltr_add_eth(vsi, ETH_P_PAUSE, ICE_FLTR_TX,
+ ICE_DROP_PACKET);
+ ice_cfg_sw_lldp(vsi, true, true);
+ }
+
+ if (!vsi->agg_node)
+ ice_set_agg_vsi(vsi);
+ return vsi;
+
+unroll_clear_rings:
+ ice_vsi_clear_rings(vsi);
+unroll_vector_base:
+ /* reclaim SW interrupts back to the common pool */
+ ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
+ pf->num_avail_sw_msix += vsi->num_q_vectors;
+unroll_alloc_q_vector:
+ ice_vsi_free_q_vectors(vsi);
+unroll_vsi_init:
+ ice_vsi_delete(vsi);
+unroll_get_qs:
+ ice_vsi_put_qs(vsi);
+unroll_vsi_alloc:
+ if (vsi_type == ICE_VSI_VF)
+ ice_enable_lag(pf->lag);
+ ice_vsi_clear(vsi);
+
+ return NULL;
+}
+
+/**
+ * ice_vsi_release_msix - Clear the queue to Interrupt mapping in HW
+ * @vsi: the VSI being cleaned up
+ */
+static void ice_vsi_release_msix(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u32 txq = 0;
+ u32 rxq = 0;
+ int i, q;
+
+ ice_for_each_q_vector(vsi, i) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+ ice_write_intrl(q_vector, 0);
+ for (q = 0; q < q_vector->num_ring_tx; q++) {
+ ice_write_itr(&q_vector->tx, 0);
+ wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+ wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]), 0);
+ }
+ txq++;
+ }
+
+ for (q = 0; q < q_vector->num_ring_rx; q++) {
+ ice_write_itr(&q_vector->rx, 0);
+ wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), 0);
+ rxq++;
+ }
+ }
+
+ ice_flush(hw);
+}
+
+/**
+ * ice_vsi_free_irq - Free the IRQ association with the OS
+ * @vsi: the VSI being configured
+ */
+void ice_vsi_free_irq(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ int base = vsi->base_vector;
+ int i;
+
+ if (!vsi->q_vectors || !vsi->irqs_ready)
+ return;
+
+ ice_vsi_release_msix(vsi);
+ if (vsi->type == ICE_VSI_VF)
+ return;
+
+ vsi->irqs_ready = false;
+ ice_free_cpu_rx_rmap(vsi);
+
+ ice_for_each_q_vector(vsi, i) {
+ u16 vector = i + base;
+ int irq_num;
+
+ irq_num = pf->msix_entries[vector].vector;
+
+ /* free only the irqs that were actually requested */
+ if (!vsi->q_vectors[i] ||
+ !(vsi->q_vectors[i]->num_ring_tx ||
+ vsi->q_vectors[i]->num_ring_rx))
+ continue;
+
+ /* clear the affinity notifier in the IRQ descriptor */
+ if (!IS_ENABLED(CONFIG_RFS_ACCEL))
+ irq_set_affinity_notifier(irq_num, NULL);
+
+ /* clear the affinity_mask in the IRQ descriptor */
+ irq_set_affinity_hint(irq_num, NULL);
+ synchronize_irq(irq_num);
+ devm_free_irq(ice_pf_to_dev(pf), irq_num, vsi->q_vectors[i]);
+ }
+}
+
+/**
+ * ice_vsi_free_tx_rings - Free Tx resources for VSI queues
+ * @vsi: the VSI having resources freed
+ */
+void ice_vsi_free_tx_rings(struct ice_vsi *vsi)
+{
+ int i;
+
+ if (!vsi->tx_rings)
+ return;
+
+ ice_for_each_txq(vsi, i)
+ if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
+ ice_free_tx_ring(vsi->tx_rings[i]);
+}
+
+/**
+ * ice_vsi_free_rx_rings - Free Rx resources for VSI queues
+ * @vsi: the VSI having resources freed
+ */
+void ice_vsi_free_rx_rings(struct ice_vsi *vsi)
+{
+ int i;
+
+ if (!vsi->rx_rings)
+ return;
+
+ ice_for_each_rxq(vsi, i)
+ if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc)
+ ice_free_rx_ring(vsi->rx_rings[i]);
+}
+
+/**
+ * ice_vsi_close - Shut down a VSI
+ * @vsi: the VSI being shut down
+ */
+void ice_vsi_close(struct ice_vsi *vsi)
+{
+ if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state))
+ ice_down(vsi);
+
+ ice_vsi_free_irq(vsi);
+ ice_vsi_free_tx_rings(vsi);
+ ice_vsi_free_rx_rings(vsi);
+}
+
+/**
+ * ice_ena_vsi - resume a VSI
+ * @vsi: the VSI being resume
+ * @locked: is the rtnl_lock already held
+ */
+int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
+{
+ int err = 0;
+
+ if (!test_bit(ICE_VSI_NEEDS_RESTART, vsi->state))
+ return 0;
+
+ clear_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
+
+ if (vsi->netdev && vsi->type == ICE_VSI_PF) {
+ if (netif_running(vsi->netdev)) {
+ if (!locked)
+ rtnl_lock();
+
+ err = ice_open_internal(vsi->netdev);
+
+ if (!locked)
+ rtnl_unlock();
+ }
+ } else if (vsi->type == ICE_VSI_CTRL) {
+ err = ice_vsi_open_ctrl(vsi);
+ }
+
+ return err;
+}
+
+/**
+ * ice_dis_vsi - pause a VSI
+ * @vsi: the VSI being paused
+ * @locked: is the rtnl_lock already held
+ */
+void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
+{
+ if (test_bit(ICE_VSI_DOWN, vsi->state))
+ return;
+
+ set_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
+
+ if (vsi->type == ICE_VSI_PF && vsi->netdev) {
+ if (netif_running(vsi->netdev)) {
+ if (!locked)
+ rtnl_lock();
+
+ ice_vsi_close(vsi);
+
+ if (!locked)
+ rtnl_unlock();
+ } else {
+ ice_vsi_close(vsi);
+ }
+ } else if (vsi->type == ICE_VSI_CTRL ||
+ vsi->type == ICE_VSI_SWITCHDEV_CTRL) {
+ ice_vsi_close(vsi);
+ }
+}
+
+/**
+ * ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI
+ * @vsi: the VSI being un-configured
+ */
+void ice_vsi_dis_irq(struct ice_vsi *vsi)
+{
+ int base = vsi->base_vector;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u32 val;
+ int i;
+
+ /* disable interrupt causation from each queue */
+ if (vsi->tx_rings) {
+ ice_for_each_txq(vsi, i) {
+ if (vsi->tx_rings[i]) {
+ u16 reg;
+
+ reg = vsi->tx_rings[i]->reg_idx;
+ val = rd32(hw, QINT_TQCTL(reg));
+ val &= ~QINT_TQCTL_CAUSE_ENA_M;
+ wr32(hw, QINT_TQCTL(reg), val);
+ }
+ }
+ }
+
+ if (vsi->rx_rings) {
+ ice_for_each_rxq(vsi, i) {
+ if (vsi->rx_rings[i]) {
+ u16 reg;
+
+ reg = vsi->rx_rings[i]->reg_idx;
+ val = rd32(hw, QINT_RQCTL(reg));
+ val &= ~QINT_RQCTL_CAUSE_ENA_M;
+ wr32(hw, QINT_RQCTL(reg), val);
+ }
+ }
+ }
+
+ /* disable each interrupt */
+ ice_for_each_q_vector(vsi, i) {
+ if (!vsi->q_vectors[i])
+ continue;
+ wr32(hw, GLINT_DYN_CTL(vsi->q_vectors[i]->reg_idx), 0);
+ }
+
+ ice_flush(hw);
+
+ /* don't call synchronize_irq() for VF's from the host */
+ if (vsi->type == ICE_VSI_VF)
+ return;
+
+ ice_for_each_q_vector(vsi, i)
+ synchronize_irq(pf->msix_entries[i + base].vector);
+}
+
+/**
+ * ice_napi_del - Remove NAPI handler for the VSI
+ * @vsi: VSI for which NAPI handler is to be removed
+ */
+void ice_napi_del(struct ice_vsi *vsi)
+{
+ int v_idx;
+
+ if (!vsi->netdev)
+ return;
+
+ ice_for_each_q_vector(vsi, v_idx)
+ netif_napi_del(&vsi->q_vectors[v_idx]->napi);
+}
+
+/**
+ * ice_free_vf_ctrl_res - Free the VF control VSI resource
+ * @pf: pointer to PF structure
+ * @vsi: the VSI to free resources for
+ *
+ * Check if the VF control VSI resource is still in use. If no VF is using it
+ * any more, release the VSI resource. Otherwise, leave it to be cleaned up
+ * once no other VF uses it.
+ */
+static void ice_free_vf_ctrl_res(struct ice_pf *pf, struct ice_vsi *vsi)
+{
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ rcu_read_lock();
+ ice_for_each_vf_rcu(pf, bkt, vf) {
+ if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) {
+ rcu_read_unlock();
+ return;
+ }
+ }
+ rcu_read_unlock();
+
+ /* No other VFs left that have control VSI. It is now safe to reclaim
+ * SW interrupts back to the common pool.
+ */
+ ice_free_res(pf->irq_tracker, vsi->base_vector,
+ ICE_RES_VF_CTRL_VEC_ID);
+ pf->num_avail_sw_msix += vsi->num_q_vectors;
+}
+
+/**
+ * ice_vsi_release - Delete a VSI and free its resources
+ * @vsi: the VSI being removed
+ *
+ * Returns 0 on success or < 0 on error
+ */
+int ice_vsi_release(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf;
+ int err;
+
+ if (!vsi->back)
+ return -ENODEV;
+ pf = vsi->back;
+
+ /* do not unregister while driver is in the reset recovery pending
+ * state. Since reset/rebuild happens through PF service task workqueue,
+ * it's not a good idea to unregister netdev that is associated to the
+ * PF that is running the work queue items currently. This is done to
+ * avoid check_flush_dependency() warning on this wq
+ */
+ if (vsi->netdev && !ice_is_reset_in_progress(pf->state) &&
+ (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state))) {
+ unregister_netdev(vsi->netdev);
+ clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
+ }
+
+ if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+ ice_rss_clean(vsi);
+
+ /* Disable VSI and free resources */
+ if (vsi->type != ICE_VSI_LB)
+ ice_vsi_dis_irq(vsi);
+ ice_vsi_close(vsi);
+
+ /* SR-IOV determines needed MSIX resources all at once instead of per
+ * VSI since when VFs are spawned we know how many VFs there are and how
+ * many interrupts each VF needs. SR-IOV MSIX resources are also
+ * cleared in the same manner.
+ */
+ if (vsi->type == ICE_VSI_CTRL && vsi->vf) {
+ ice_free_vf_ctrl_res(pf, vsi);
+ } else if (vsi->type != ICE_VSI_VF) {
+ /* reclaim SW interrupts back to the common pool */
+ ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
+ pf->num_avail_sw_msix += vsi->num_q_vectors;
+ }
+
+ if (!ice_is_safe_mode(pf)) {
+ if (vsi->type == ICE_VSI_PF) {
+ ice_fltr_remove_eth(vsi, ETH_P_PAUSE, ICE_FLTR_TX,
+ ICE_DROP_PACKET);
+ ice_cfg_sw_lldp(vsi, true, false);
+ /* The Rx rule will only exist to remove if the LLDP FW
+ * engine is currently stopped
+ */
+ if (!test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
+ ice_cfg_sw_lldp(vsi, false, false);
+ }
+ }
+
+ if (ice_is_vsi_dflt_vsi(vsi))
+ ice_clear_dflt_vsi(vsi);
+ ice_fltr_remove_all(vsi);
+ ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
+ err = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx);
+ if (err)
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to remove RDMA scheduler config for VSI %u, err %d\n",
+ vsi->vsi_num, err);
+ ice_vsi_delete(vsi);
+ ice_vsi_free_q_vectors(vsi);
+
+ if (vsi->netdev) {
+ if (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state)) {
+ unregister_netdev(vsi->netdev);
+ clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
+ }
+ if (test_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state)) {
+ free_netdev(vsi->netdev);
+ vsi->netdev = NULL;
+ clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
+ }
+ }
+
+ if (vsi->type == ICE_VSI_PF)
+ ice_devlink_destroy_pf_port(pf);
+
+ if (vsi->type == ICE_VSI_VF &&
+ vsi->agg_node && vsi->agg_node->valid)
+ vsi->agg_node->num_vsis--;
+ ice_vsi_clear_rings(vsi);
+
+ ice_vsi_put_qs(vsi);
+
+ /* retain SW VSI data structure since it is needed to unregister and
+ * free VSI netdev when PF is not in reset recovery pending state,\
+ * for ex: during rmmod.
+ */
+ if (!ice_is_reset_in_progress(pf->state))
+ ice_vsi_clear(vsi);
+
+ return 0;
+}
+
+/**
+ * ice_vsi_rebuild_get_coalesce - get coalesce from all q_vectors
+ * @vsi: VSI connected with q_vectors
+ * @coalesce: array of struct with stored coalesce
+ *
+ * Returns array size.
+ */
+static int
+ice_vsi_rebuild_get_coalesce(struct ice_vsi *vsi,
+ struct ice_coalesce_stored *coalesce)
+{
+ int i;
+
+ ice_for_each_q_vector(vsi, i) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+ coalesce[i].itr_tx = q_vector->tx.itr_settings;
+ coalesce[i].itr_rx = q_vector->rx.itr_settings;
+ coalesce[i].intrl = q_vector->intrl;
+
+ if (i < vsi->num_txq)
+ coalesce[i].tx_valid = true;
+ if (i < vsi->num_rxq)
+ coalesce[i].rx_valid = true;
+ }
+
+ return vsi->num_q_vectors;
+}
+
+/**
+ * ice_vsi_rebuild_set_coalesce - set coalesce from earlier saved arrays
+ * @vsi: VSI connected with q_vectors
+ * @coalesce: pointer to array of struct with stored coalesce
+ * @size: size of coalesce array
+ *
+ * Before this function, ice_vsi_rebuild_get_coalesce should be called to save
+ * ITR params in arrays. If size is 0 or coalesce wasn't stored set coalesce
+ * to default value.
+ */
+static void
+ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
+ struct ice_coalesce_stored *coalesce, int size)
+{
+ struct ice_ring_container *rc;
+ int i;
+
+ if ((size && !coalesce) || !vsi)
+ return;
+
+ /* There are a couple of cases that have to be handled here:
+ * 1. The case where the number of queue vectors stays the same, but
+ * the number of Tx or Rx rings changes (the first for loop)
+ * 2. The case where the number of queue vectors increased (the
+ * second for loop)
+ */
+ for (i = 0; i < size && i < vsi->num_q_vectors; i++) {
+ /* There are 2 cases to handle here and they are the same for
+ * both Tx and Rx:
+ * if the entry was valid previously (coalesce[i].[tr]x_valid
+ * and the loop variable is less than the number of rings
+ * allocated, then write the previous values
+ *
+ * if the entry was not valid previously, but the number of
+ * rings is less than are allocated (this means the number of
+ * rings increased from previously), then write out the
+ * values in the first element
+ *
+ * Also, always write the ITR, even if in ITR_IS_DYNAMIC
+ * as there is no harm because the dynamic algorithm
+ * will just overwrite.
+ */
+ if (i < vsi->alloc_rxq && coalesce[i].rx_valid) {
+ rc = &vsi->q_vectors[i]->rx;
+ rc->itr_settings = coalesce[i].itr_rx;
+ ice_write_itr(rc, rc->itr_setting);
+ } else if (i < vsi->alloc_rxq) {
+ rc = &vsi->q_vectors[i]->rx;
+ rc->itr_settings = coalesce[0].itr_rx;
+ ice_write_itr(rc, rc->itr_setting);
+ }
+
+ if (i < vsi->alloc_txq && coalesce[i].tx_valid) {
+ rc = &vsi->q_vectors[i]->tx;
+ rc->itr_settings = coalesce[i].itr_tx;
+ ice_write_itr(rc, rc->itr_setting);
+ } else if (i < vsi->alloc_txq) {
+ rc = &vsi->q_vectors[i]->tx;
+ rc->itr_settings = coalesce[0].itr_tx;
+ ice_write_itr(rc, rc->itr_setting);
+ }
+
+ vsi->q_vectors[i]->intrl = coalesce[i].intrl;
+ ice_set_q_vector_intrl(vsi->q_vectors[i]);
+ }
+
+ /* the number of queue vectors increased so write whatever is in
+ * the first element
+ */
+ for (; i < vsi->num_q_vectors; i++) {
+ /* transmit */
+ rc = &vsi->q_vectors[i]->tx;
+ rc->itr_settings = coalesce[0].itr_tx;
+ ice_write_itr(rc, rc->itr_setting);
+
+ /* receive */
+ rc = &vsi->q_vectors[i]->rx;
+ rc->itr_settings = coalesce[0].itr_rx;
+ ice_write_itr(rc, rc->itr_setting);
+
+ vsi->q_vectors[i]->intrl = coalesce[0].intrl;
+ ice_set_q_vector_intrl(vsi->q_vectors[i]);
+ }
+}
+
+/**
+ * ice_vsi_rebuild - Rebuild VSI after reset
+ * @vsi: VSI to be rebuild
+ * @init_vsi: is this an initialization or a reconfigure of the VSI
+ *
+ * Returns 0 on success and negative value on failure
+ */
+int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
+{
+ u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+ struct ice_coalesce_stored *coalesce;
+ int prev_num_q_vectors = 0;
+ enum ice_vsi_type vtype;
+ struct ice_pf *pf;
+ int ret, i;
+
+ if (!vsi)
+ return -EINVAL;
+
+ pf = vsi->back;
+ vtype = vsi->type;
+ if (WARN_ON(vtype == ICE_VSI_VF && !vsi->vf))
+ return -EINVAL;
+
+ ice_vsi_init_vlan_ops(vsi);
+
+ coalesce = kcalloc(vsi->num_q_vectors,
+ sizeof(struct ice_coalesce_stored), GFP_KERNEL);
+ if (!coalesce)
+ return -ENOMEM;
+
+ prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
+
+ ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
+ ret = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx);
+ if (ret)
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to remove RDMA scheduler config for VSI %u, err %d\n",
+ vsi->vsi_num, ret);
+ ice_vsi_free_q_vectors(vsi);
+
+ /* SR-IOV determines needed MSIX resources all at once instead of per
+ * VSI since when VFs are spawned we know how many VFs there are and how
+ * many interrupts each VF needs. SR-IOV MSIX resources are also
+ * cleared in the same manner.
+ */
+ if (vtype != ICE_VSI_VF) {
+ /* reclaim SW interrupts back to the common pool */
+ ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
+ pf->num_avail_sw_msix += vsi->num_q_vectors;
+ vsi->base_vector = 0;
+ }
+
+ if (ice_is_xdp_ena_vsi(vsi))
+ /* return value check can be skipped here, it always returns
+ * 0 if reset is in progress
+ */
+ ice_destroy_xdp_rings(vsi);
+ ice_vsi_put_qs(vsi);
+ ice_vsi_clear_rings(vsi);
+ ice_vsi_free_arrays(vsi);
+ if (vtype == ICE_VSI_VF)
+ ice_vsi_set_num_qs(vsi, vsi->vf);
+ else
+ ice_vsi_set_num_qs(vsi, NULL);
+
+ ret = ice_vsi_alloc_arrays(vsi);
+ if (ret < 0)
+ goto err_vsi;
+
+ ice_vsi_get_qs(vsi);
+
+ ice_alloc_fd_res(vsi);
+ ice_vsi_set_tc_cfg(vsi);
+
+ /* Initialize VSI struct elements and create VSI in FW */
+ ret = ice_vsi_init(vsi, init_vsi);
+ if (ret < 0)
+ goto err_vsi;
+
+ switch (vtype) {
+ case ICE_VSI_CTRL:
+ case ICE_VSI_SWITCHDEV_CTRL:
+ case ICE_VSI_PF:
+ ret = ice_vsi_alloc_q_vectors(vsi);
+ if (ret)
+ goto err_rings;
+
+ ret = ice_vsi_setup_vector_base(vsi);
+ if (ret)
+ goto err_vectors;
+
+ ret = ice_vsi_set_q_vectors_reg_idx(vsi);
+ if (ret)
+ goto err_vectors;
+
+ ret = ice_vsi_alloc_rings(vsi);
+ if (ret)
+ goto err_vectors;
+
+ ice_vsi_map_rings_to_vectors(vsi);
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ ret = ice_vsi_determine_xdp_res(vsi);
+ if (ret)
+ goto err_vectors;
+ ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog);
+ if (ret)
+ goto err_vectors;
+ }
+ /* ICE_VSI_CTRL does not need RSS so skip RSS processing */
+ if (vtype != ICE_VSI_CTRL)
+ /* Do not exit if configuring RSS had an issue, at
+ * least receive traffic on first queue. Hence no
+ * need to capture return value
+ */
+ if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+ ice_vsi_cfg_rss_lut_key(vsi);
+
+ /* disable or enable CRC stripping */
+ if (vsi->netdev)
+ ice_vsi_cfg_crc_strip(vsi, !!(vsi->netdev->features &
+ NETIF_F_RXFCS));
+
+ break;
+ case ICE_VSI_VF:
+ ret = ice_vsi_alloc_q_vectors(vsi);
+ if (ret)
+ goto err_rings;
+
+ ret = ice_vsi_set_q_vectors_reg_idx(vsi);
+ if (ret)
+ goto err_vectors;
+
+ ret = ice_vsi_alloc_rings(vsi);
+ if (ret)
+ goto err_vectors;
+
+ break;
+ case ICE_VSI_CHNL:
+ if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+ ice_vsi_cfg_rss_lut_key(vsi);
+ ice_vsi_set_rss_flow_fld(vsi);
+ }
+ break;
+ default:
+ break;
+ }
+
+ /* configure VSI nodes based on number of queues and TC's */
+ for (i = 0; i < vsi->tc_cfg.numtc; i++) {
+ /* configure VSI nodes based on number of queues and TC's.
+ * ADQ creates VSIs for each TC/Channel but doesn't
+ * allocate queues instead it reconfigures the PF queues
+ * as per the TC command. So max_txqs should point to the
+ * PF Tx queues.
+ */
+ if (vtype == ICE_VSI_CHNL)
+ max_txqs[i] = pf->num_lan_tx;
+ else
+ max_txqs[i] = vsi->alloc_txq;
+
+ if (ice_is_xdp_ena_vsi(vsi))
+ max_txqs[i] += vsi->num_xdp_txq;
+ }
+
+ if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+ /* If MQPRIO is set, means channel code path, hence for main
+ * VSI's, use TC as 1
+ */
+ ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, 1, max_txqs);
+ else
+ ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx,
+ vsi->tc_cfg.ena_tc, max_txqs);
+
+ if (ret) {
+ dev_err(ice_pf_to_dev(pf), "VSI %d failed lan queue config, error %d\n",
+ vsi->vsi_num, ret);
+ if (init_vsi) {
+ ret = -EIO;
+ goto err_vectors;
+ } else {
+ return ice_schedule_reset(pf, ICE_RESET_PFR);
+ }
+ }
+ ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors);
+ kfree(coalesce);
+
+ return 0;
+
+err_vectors:
+ ice_vsi_free_q_vectors(vsi);
+err_rings:
+ if (vsi->netdev) {
+ vsi->current_netdev_flags = 0;
+ unregister_netdev(vsi->netdev);
+ free_netdev(vsi->netdev);
+ vsi->netdev = NULL;
+ }
+err_vsi:
+ ice_vsi_clear(vsi);
+ set_bit(ICE_RESET_FAILED, pf->state);
+ kfree(coalesce);
+ return ret;
+}
+
+/**
+ * ice_is_reset_in_progress - check for a reset in progress
+ * @state: PF state field
+ */
+bool ice_is_reset_in_progress(unsigned long *state)
+{
+ return test_bit(ICE_RESET_OICR_RECV, state) ||
+ test_bit(ICE_PFR_REQ, state) ||
+ test_bit(ICE_CORER_REQ, state) ||
+ test_bit(ICE_GLOBR_REQ, state);
+}
+
+/**
+ * ice_wait_for_reset - Wait for driver to finish reset and rebuild
+ * @pf: pointer to the PF structure
+ * @timeout: length of time to wait, in jiffies
+ *
+ * Wait (sleep) for a short time until the driver finishes cleaning up from
+ * a device reset. The caller must be able to sleep. Use this to delay
+ * operations that could fail while the driver is cleaning up after a device
+ * reset.
+ *
+ * Returns 0 on success, -EBUSY if the reset is not finished within the
+ * timeout, and -ERESTARTSYS if the thread was interrupted.
+ */
+int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout)
+{
+ long ret;
+
+ ret = wait_event_interruptible_timeout(pf->reset_wait_queue,
+ !ice_is_reset_in_progress(pf->state),
+ timeout);
+ if (ret < 0)
+ return ret;
+ else if (!ret)
+ return -EBUSY;
+ else
+ return 0;
+}
+
+/**
+ * ice_vsi_update_q_map - update our copy of the VSI info with new queue map
+ * @vsi: VSI being configured
+ * @ctx: the context buffer returned from AQ VSI update command
+ */
+static void ice_vsi_update_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx)
+{
+ vsi->info.mapping_flags = ctx->info.mapping_flags;
+ memcpy(&vsi->info.q_mapping, &ctx->info.q_mapping,
+ sizeof(vsi->info.q_mapping));
+ memcpy(&vsi->info.tc_mapping, ctx->info.tc_mapping,
+ sizeof(vsi->info.tc_mapping));
+}
+
+/**
+ * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration
+ * @vsi: the VSI being configured
+ * @ena_tc: TC map to be enabled
+ */
+void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc)
+{
+ struct net_device *netdev = vsi->netdev;
+ struct ice_pf *pf = vsi->back;
+ int numtc = vsi->tc_cfg.numtc;
+ struct ice_dcbx_cfg *dcbcfg;
+ u8 netdev_tc;
+ int i;
+
+ if (!netdev)
+ return;
+
+ /* CHNL VSI doesn't have it's own netdev, hence, no netdev_tc */
+ if (vsi->type == ICE_VSI_CHNL)
+ return;
+
+ if (!ena_tc) {
+ netdev_reset_tc(netdev);
+ return;
+ }
+
+ if (vsi->type == ICE_VSI_PF && ice_is_adq_active(pf))
+ numtc = vsi->all_numtc;
+
+ if (netdev_set_num_tc(netdev, numtc))
+ return;
+
+ dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+ ice_for_each_traffic_class(i)
+ if (vsi->tc_cfg.ena_tc & BIT(i))
+ netdev_set_tc_queue(netdev,
+ vsi->tc_cfg.tc_info[i].netdev_tc,
+ vsi->tc_cfg.tc_info[i].qcount_tx,
+ vsi->tc_cfg.tc_info[i].qoffset);
+ /* setup TC queue map for CHNL TCs */
+ ice_for_each_chnl_tc(i) {
+ if (!(vsi->all_enatc & BIT(i)))
+ break;
+ if (!vsi->mqprio_qopt.qopt.count[i])
+ break;
+ netdev_set_tc_queue(netdev, i,
+ vsi->mqprio_qopt.qopt.count[i],
+ vsi->mqprio_qopt.qopt.offset[i]);
+ }
+
+ if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+ return;
+
+ for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
+ u8 ets_tc = dcbcfg->etscfg.prio_table[i];
+
+ /* Get the mapped netdev TC# for the UP */
+ netdev_tc = vsi->tc_cfg.tc_info[ets_tc].netdev_tc;
+ netdev_set_prio_tc_map(netdev, i, netdev_tc);
+ }
+}
+
+/**
+ * ice_vsi_setup_q_map_mqprio - Prepares mqprio based tc_config
+ * @vsi: the VSI being configured,
+ * @ctxt: VSI context structure
+ * @ena_tc: number of traffic classes to enable
+ *
+ * Prepares VSI tc_config to have queue configurations based on MQPRIO options.
+ */
+static int
+ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt,
+ u8 ena_tc)
+{
+ u16 pow, offset = 0, qcount_tx = 0, qcount_rx = 0, qmap;
+ u16 tc0_offset = vsi->mqprio_qopt.qopt.offset[0];
+ int tc0_qcount = vsi->mqprio_qopt.qopt.count[0];
+ u16 new_txq, new_rxq;
+ u8 netdev_tc = 0;
+ int i;
+
+ vsi->tc_cfg.ena_tc = ena_tc ? ena_tc : 1;
+
+ pow = order_base_2(tc0_qcount);
+ qmap = ((tc0_offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
+ ICE_AQ_VSI_TC_Q_OFFSET_M) |
+ ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M);
+
+ ice_for_each_traffic_class(i) {
+ if (!(vsi->tc_cfg.ena_tc & BIT(i))) {
+ /* TC is not enabled */
+ vsi->tc_cfg.tc_info[i].qoffset = 0;
+ vsi->tc_cfg.tc_info[i].qcount_rx = 1;
+ vsi->tc_cfg.tc_info[i].qcount_tx = 1;
+ vsi->tc_cfg.tc_info[i].netdev_tc = 0;
+ ctxt->info.tc_mapping[i] = 0;
+ continue;
+ }
+
+ offset = vsi->mqprio_qopt.qopt.offset[i];
+ qcount_rx = vsi->mqprio_qopt.qopt.count[i];
+ qcount_tx = vsi->mqprio_qopt.qopt.count[i];
+ vsi->tc_cfg.tc_info[i].qoffset = offset;
+ vsi->tc_cfg.tc_info[i].qcount_rx = qcount_rx;
+ vsi->tc_cfg.tc_info[i].qcount_tx = qcount_tx;
+ vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++;
+ }
+
+ if (vsi->all_numtc && vsi->all_numtc != vsi->tc_cfg.numtc) {
+ ice_for_each_chnl_tc(i) {
+ if (!(vsi->all_enatc & BIT(i)))
+ continue;
+ offset = vsi->mqprio_qopt.qopt.offset[i];
+ qcount_rx = vsi->mqprio_qopt.qopt.count[i];
+ qcount_tx = vsi->mqprio_qopt.qopt.count[i];
+ }
+ }
+
+ new_txq = offset + qcount_tx;
+ if (new_txq > vsi->alloc_txq) {
+ dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Tx queues (%u), than were allocated (%u)!\n",
+ new_txq, vsi->alloc_txq);
+ return -EINVAL;
+ }
+
+ new_rxq = offset + qcount_rx;
+ if (new_rxq > vsi->alloc_rxq) {
+ dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Rx queues (%u), than were allocated (%u)!\n",
+ new_rxq, vsi->alloc_rxq);
+ return -EINVAL;
+ }
+
+ /* Set actual Tx/Rx queue pairs */
+ vsi->num_txq = new_txq;
+ vsi->num_rxq = new_rxq;
+
+ /* Setup queue TC[0].qmap for given VSI context */
+ ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+ ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]);
+ ctxt->info.q_mapping[1] = cpu_to_le16(tc0_qcount);
+
+ /* Find queue count available for channel VSIs and starting offset
+ * for channel VSIs
+ */
+ if (tc0_qcount && tc0_qcount < vsi->num_rxq) {
+ vsi->cnt_q_avail = vsi->num_rxq - tc0_qcount;
+ vsi->next_base_q = tc0_qcount;
+ }
+ dev_dbg(ice_pf_to_dev(vsi->back), "vsi->num_txq = %d\n", vsi->num_txq);
+ dev_dbg(ice_pf_to_dev(vsi->back), "vsi->num_rxq = %d\n", vsi->num_rxq);
+ dev_dbg(ice_pf_to_dev(vsi->back), "all_numtc %u, all_enatc: 0x%04x, tc_cfg.numtc %u\n",
+ vsi->all_numtc, vsi->all_enatc, vsi->tc_cfg.numtc);
+
+ return 0;
+}
+
+/**
+ * ice_vsi_cfg_tc - Configure VSI Tx Sched for given TC map
+ * @vsi: VSI to be configured
+ * @ena_tc: TC bitmap
+ *
+ * VSI queues expected to be quiesced before calling this function
+ */
+int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
+{
+ u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+ struct ice_pf *pf = vsi->back;
+ struct ice_tc_cfg old_tc_cfg;
+ struct ice_vsi_ctx *ctx;
+ struct device *dev;
+ int i, ret = 0;
+ u8 num_tc = 0;
+
+ dev = ice_pf_to_dev(pf);
+ if (vsi->tc_cfg.ena_tc == ena_tc &&
+ vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL)
+ return ret;
+
+ ice_for_each_traffic_class(i) {
+ /* build bitmap of enabled TCs */
+ if (ena_tc & BIT(i))
+ num_tc++;
+ /* populate max_txqs per TC */
+ max_txqs[i] = vsi->alloc_txq;
+ /* Update max_txqs if it is CHNL VSI, because alloc_t[r]xq are
+ * zero for CHNL VSI, hence use num_txq instead as max_txqs
+ */
+ if (vsi->type == ICE_VSI_CHNL &&
+ test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+ max_txqs[i] = vsi->num_txq;
+ }
+
+ memcpy(&old_tc_cfg, &vsi->tc_cfg, sizeof(old_tc_cfg));
+ vsi->tc_cfg.ena_tc = ena_tc;
+ vsi->tc_cfg.numtc = num_tc;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->vf_num = 0;
+ ctx->info = vsi->info;
+
+ if (vsi->type == ICE_VSI_PF &&
+ test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+ ret = ice_vsi_setup_q_map_mqprio(vsi, ctx, ena_tc);
+ else
+ ret = ice_vsi_setup_q_map(vsi, ctx);
+
+ if (ret) {
+ memcpy(&vsi->tc_cfg, &old_tc_cfg, sizeof(vsi->tc_cfg));
+ goto out;
+ }
+
+ /* must to indicate which section of VSI context are being modified */
+ ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
+ ret = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL);
+ if (ret) {
+ dev_info(dev, "Failed VSI Update\n");
+ goto out;
+ }
+
+ if (vsi->type == ICE_VSI_PF &&
+ test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+ ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, 1, max_txqs);
+ else
+ ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx,
+ vsi->tc_cfg.ena_tc, max_txqs);
+
+ if (ret) {
+ dev_err(dev, "VSI %d failed TC config, error %d\n",
+ vsi->vsi_num, ret);
+ goto out;
+ }
+ ice_vsi_update_q_map(vsi, ctx);
+ vsi->info.valid_sections = 0;
+
+ ice_vsi_cfg_netdev_tc(vsi, ena_tc);
+out:
+ kfree(ctx);
+ return ret;
+}
+
+/**
+ * ice_update_ring_stats - Update ring statistics
+ * @stats: stats to be updated
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ *
+ * This function assumes that caller has acquired a u64_stats_sync lock.
+ */
+static void ice_update_ring_stats(struct ice_q_stats *stats, u64 pkts, u64 bytes)
+{
+ stats->bytes += bytes;
+ stats->pkts += pkts;
+}
+
+/**
+ * ice_update_tx_ring_stats - Update Tx ring specific counters
+ * @tx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_tx_ring_stats(struct ice_tx_ring *tx_ring, u64 pkts, u64 bytes)
+{
+ u64_stats_update_begin(&tx_ring->syncp);
+ ice_update_ring_stats(&tx_ring->stats, pkts, bytes);
+ u64_stats_update_end(&tx_ring->syncp);
+}
+
+/**
+ * ice_update_rx_ring_stats - Update Rx ring specific counters
+ * @rx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_rx_ring_stats(struct ice_rx_ring *rx_ring, u64 pkts, u64 bytes)
+{
+ u64_stats_update_begin(&rx_ring->syncp);
+ ice_update_ring_stats(&rx_ring->stats, pkts, bytes);
+ u64_stats_update_end(&rx_ring->syncp);
+}
+
+/**
+ * ice_is_dflt_vsi_in_use - check if the default forwarding VSI is being used
+ * @pi: port info of the switch with default VSI
+ *
+ * Return true if the there is a single VSI in default forwarding VSI list
+ */
+bool ice_is_dflt_vsi_in_use(struct ice_port_info *pi)
+{
+ bool exists = false;
+
+ ice_check_if_dflt_vsi(pi, 0, &exists);
+ return exists;
+}
+
+/**
+ * ice_is_vsi_dflt_vsi - check if the VSI passed in is the default VSI
+ * @vsi: VSI to compare against default forwarding VSI
+ *
+ * If this VSI passed in is the default forwarding VSI then return true, else
+ * return false
+ */
+bool ice_is_vsi_dflt_vsi(struct ice_vsi *vsi)
+{
+ return ice_check_if_dflt_vsi(vsi->port_info, vsi->idx, NULL);
+}
+
+/**
+ * ice_set_dflt_vsi - set the default forwarding VSI
+ * @vsi: VSI getting set as the default forwarding VSI on the switch
+ *
+ * If the VSI passed in is already the default VSI and it's enabled just return
+ * success.
+ *
+ * Otherwise try to set the VSI passed in as the switch's default VSI and
+ * return the result.
+ */
+int ice_set_dflt_vsi(struct ice_vsi *vsi)
+{
+ struct device *dev;
+ int status;
+
+ if (!vsi)
+ return -EINVAL;
+
+ dev = ice_pf_to_dev(vsi->back);
+
+ /* the VSI passed in is already the default VSI */
+ if (ice_is_vsi_dflt_vsi(vsi)) {
+ dev_dbg(dev, "VSI %d passed in is already the default forwarding VSI, nothing to do\n",
+ vsi->vsi_num);
+ return 0;
+ }
+
+ status = ice_cfg_dflt_vsi(vsi->port_info, vsi->idx, true, ICE_FLTR_RX);
+ if (status) {
+ dev_err(dev, "Failed to set VSI %d as the default forwarding VSI, error %d\n",
+ vsi->vsi_num, status);
+ return status;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_clear_dflt_vsi - clear the default forwarding VSI
+ * @vsi: VSI to remove from filter list
+ *
+ * If the switch has no default VSI or it's not enabled then return error.
+ *
+ * Otherwise try to clear the default VSI and return the result.
+ */
+int ice_clear_dflt_vsi(struct ice_vsi *vsi)
+{
+ struct device *dev;
+ int status;
+
+ if (!vsi)
+ return -EINVAL;
+
+ dev = ice_pf_to_dev(vsi->back);
+
+ /* there is no default VSI configured */
+ if (!ice_is_dflt_vsi_in_use(vsi->port_info))
+ return -ENODEV;
+
+ status = ice_cfg_dflt_vsi(vsi->port_info, vsi->idx, false,
+ ICE_FLTR_RX);
+ if (status) {
+ dev_err(dev, "Failed to clear the default forwarding VSI %d, error %d\n",
+ vsi->vsi_num, status);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_get_link_speed_mbps - get link speed in Mbps
+ * @vsi: the VSI whose link speed is being queried
+ *
+ * Return current VSI link speed and 0 if the speed is unknown.
+ */
+int ice_get_link_speed_mbps(struct ice_vsi *vsi)
+{
+ switch (vsi->port_info->phy.link_info.link_speed) {
+ case ICE_AQ_LINK_SPEED_100GB:
+ return SPEED_100000;
+ case ICE_AQ_LINK_SPEED_50GB:
+ return SPEED_50000;
+ case ICE_AQ_LINK_SPEED_40GB:
+ return SPEED_40000;
+ case ICE_AQ_LINK_SPEED_25GB:
+ return SPEED_25000;
+ case ICE_AQ_LINK_SPEED_20GB:
+ return SPEED_20000;
+ case ICE_AQ_LINK_SPEED_10GB:
+ return SPEED_10000;
+ case ICE_AQ_LINK_SPEED_5GB:
+ return SPEED_5000;
+ case ICE_AQ_LINK_SPEED_2500MB:
+ return SPEED_2500;
+ case ICE_AQ_LINK_SPEED_1000MB:
+ return SPEED_1000;
+ case ICE_AQ_LINK_SPEED_100MB:
+ return SPEED_100;
+ case ICE_AQ_LINK_SPEED_10MB:
+ return SPEED_10;
+ case ICE_AQ_LINK_SPEED_UNKNOWN:
+ default:
+ return 0;
+ }
+}
+
+/**
+ * ice_get_link_speed_kbps - get link speed in Kbps
+ * @vsi: the VSI whose link speed is being queried
+ *
+ * Return current VSI link speed and 0 if the speed is unknown.
+ */
+int ice_get_link_speed_kbps(struct ice_vsi *vsi)
+{
+ int speed_mbps;
+
+ speed_mbps = ice_get_link_speed_mbps(vsi);
+
+ return speed_mbps * 1000;
+}
+
+/**
+ * ice_set_min_bw_limit - setup minimum BW limit for Tx based on min_tx_rate
+ * @vsi: VSI to be configured
+ * @min_tx_rate: min Tx rate in Kbps to be configured as BW limit
+ *
+ * If the min_tx_rate is specified as 0 that means to clear the minimum BW limit
+ * profile, otherwise a non-zero value will force a minimum BW limit for the VSI
+ * on TC 0.
+ */
+int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ int status;
+ int speed;
+
+ dev = ice_pf_to_dev(pf);
+ if (!vsi->port_info) {
+ dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n",
+ vsi->idx, vsi->type);
+ return -EINVAL;
+ }
+
+ speed = ice_get_link_speed_kbps(vsi);
+ if (min_tx_rate > (u64)speed) {
+ dev_err(dev, "invalid min Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n",
+ min_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx,
+ speed);
+ return -EINVAL;
+ }
+
+ /* Configure min BW for VSI limit */
+ if (min_tx_rate) {
+ status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0,
+ ICE_MIN_BW, min_tx_rate);
+ if (status) {
+ dev_err(dev, "failed to set min Tx rate(%llu Kbps) for %s %d\n",
+ min_tx_rate, ice_vsi_type_str(vsi->type),
+ vsi->idx);
+ return status;
+ }
+
+ dev_dbg(dev, "set min Tx rate(%llu Kbps) for %s\n",
+ min_tx_rate, ice_vsi_type_str(vsi->type));
+ } else {
+ status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info,
+ vsi->idx, 0,
+ ICE_MIN_BW);
+ if (status) {
+ dev_err(dev, "failed to clear min Tx rate configuration for %s %d\n",
+ ice_vsi_type_str(vsi->type), vsi->idx);
+ return status;
+ }
+
+ dev_dbg(dev, "cleared min Tx rate configuration for %s %d\n",
+ ice_vsi_type_str(vsi->type), vsi->idx);
+ }
+
+ return 0;
+}
+
+/**
+ * ice_set_max_bw_limit - setup maximum BW limit for Tx based on max_tx_rate
+ * @vsi: VSI to be configured
+ * @max_tx_rate: max Tx rate in Kbps to be configured as BW limit
+ *
+ * If the max_tx_rate is specified as 0 that means to clear the maximum BW limit
+ * profile, otherwise a non-zero value will force a maximum BW limit for the VSI
+ * on TC 0.
+ */
+int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ int status;
+ int speed;
+
+ dev = ice_pf_to_dev(pf);
+ if (!vsi->port_info) {
+ dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n",
+ vsi->idx, vsi->type);
+ return -EINVAL;
+ }
+
+ speed = ice_get_link_speed_kbps(vsi);
+ if (max_tx_rate > (u64)speed) {
+ dev_err(dev, "invalid max Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n",
+ max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx,
+ speed);
+ return -EINVAL;
+ }
+
+ /* Configure max BW for VSI limit */
+ if (max_tx_rate) {
+ status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0,
+ ICE_MAX_BW, max_tx_rate);
+ if (status) {
+ dev_err(dev, "failed setting max Tx rate(%llu Kbps) for %s %d\n",
+ max_tx_rate, ice_vsi_type_str(vsi->type),
+ vsi->idx);
+ return status;
+ }
+
+ dev_dbg(dev, "set max Tx rate(%llu Kbps) for %s %d\n",
+ max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx);
+ } else {
+ status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info,
+ vsi->idx, 0,
+ ICE_MAX_BW);
+ if (status) {
+ dev_err(dev, "failed clearing max Tx rate configuration for %s %d\n",
+ ice_vsi_type_str(vsi->type), vsi->idx);
+ return status;
+ }
+
+ dev_dbg(dev, "cleared max Tx rate configuration for %s %d\n",
+ ice_vsi_type_str(vsi->type), vsi->idx);
+ }
+
+ return 0;
+}
+
+/**
+ * ice_set_link - turn on/off physical link
+ * @vsi: VSI to modify physical link on
+ * @ena: turn on/off physical link
+ */
+int ice_set_link(struct ice_vsi *vsi, bool ena)
+{
+ struct device *dev = ice_pf_to_dev(vsi->back);
+ struct ice_port_info *pi = vsi->port_info;
+ struct ice_hw *hw = pi->hw;
+ int status;
+
+ if (vsi->type != ICE_VSI_PF)
+ return -EINVAL;
+
+ status = ice_aq_set_link_restart_an(pi, ena, NULL);
+
+ /* if link is owned by manageability, FW will return ICE_AQ_RC_EMODE.
+ * this is not a fatal error, so print a warning message and return
+ * a success code. Return an error if FW returns an error code other
+ * than ICE_AQ_RC_EMODE
+ */
+ if (status == -EIO) {
+ if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE)
+ dev_dbg(dev, "can't set link to %s, err %d aq_err %s. not fatal, continuing\n",
+ (ena ? "ON" : "OFF"), status,
+ ice_aq_str(hw->adminq.sq_last_status));
+ } else if (status) {
+ dev_err(dev, "can't set link to %s, err %d aq_err %s\n",
+ (ena ? "ON" : "OFF"), status,
+ ice_aq_str(hw->adminq.sq_last_status));
+ return status;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vsi_add_vlan_zero - add VLAN 0 filter(s) for this VSI
+ * @vsi: VSI used to add VLAN filters
+ *
+ * In Single VLAN Mode (SVM), single VLAN filters via ICE_SW_LKUP_VLAN are based
+ * on the inner VLAN ID, so the VLAN TPID (i.e. 0x8100 or 0x888a8) doesn't
+ * matter. In Double VLAN Mode (DVM), outer/single VLAN filters via
+ * ICE_SW_LKUP_VLAN are based on the outer/single VLAN ID + VLAN TPID.
+ *
+ * For both modes add a VLAN 0 + no VLAN TPID filter to handle untagged traffic
+ * when VLAN pruning is enabled. Also, this handles VLAN 0 priority tagged
+ * traffic in SVM, since the VLAN TPID isn't part of filtering.
+ *
+ * If DVM is enabled then an explicit VLAN 0 + VLAN TPID filter needs to be
+ * added to allow VLAN 0 priority tagged traffic in DVM, since the VLAN TPID is
+ * part of filtering.
+ */
+int ice_vsi_add_vlan_zero(struct ice_vsi *vsi)
+{
+ struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+ struct ice_vlan vlan;
+ int err;
+
+ vlan = ICE_VLAN(0, 0, 0);
+ err = vlan_ops->add_vlan(vsi, &vlan);
+ if (err && err != -EEXIST)
+ return err;
+
+ /* in SVM both VLAN 0 filters are identical */
+ if (!ice_is_dvm_ena(&vsi->back->hw))
+ return 0;
+
+ vlan = ICE_VLAN(ETH_P_8021Q, 0, 0);
+ err = vlan_ops->add_vlan(vsi, &vlan);
+ if (err && err != -EEXIST)
+ return err;
+
+ return 0;
+}
+
+/**
+ * ice_vsi_del_vlan_zero - delete VLAN 0 filter(s) for this VSI
+ * @vsi: VSI used to add VLAN filters
+ *
+ * Delete the VLAN 0 filters in the same manner that they were added in
+ * ice_vsi_add_vlan_zero.
+ */
+int ice_vsi_del_vlan_zero(struct ice_vsi *vsi)
+{
+ struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+ struct ice_vlan vlan;
+ int err;
+
+ vlan = ICE_VLAN(0, 0, 0);
+ err = vlan_ops->del_vlan(vsi, &vlan);
+ if (err && err != -EEXIST)
+ return err;
+
+ /* in SVM both VLAN 0 filters are identical */
+ if (!ice_is_dvm_ena(&vsi->back->hw))
+ return 0;
+
+ vlan = ICE_VLAN(ETH_P_8021Q, 0, 0);
+ err = vlan_ops->del_vlan(vsi, &vlan);
+ if (err && err != -EEXIST)
+ return err;
+
+ /* when deleting the last VLAN filter, make sure to disable the VLAN
+ * promisc mode so the filter isn't left by accident
+ */
+ return ice_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
+ ICE_MCAST_VLAN_PROMISC_BITS, 0);
+}
+
+/**
+ * ice_vsi_num_zero_vlans - get number of VLAN 0 filters based on VLAN mode
+ * @vsi: VSI used to get the VLAN mode
+ *
+ * If DVM is enabled then 2 VLAN 0 filters are added, else if SVM is enabled
+ * then 1 VLAN 0 filter is added. See ice_vsi_add_vlan_zero for more details.
+ */
+static u16 ice_vsi_num_zero_vlans(struct ice_vsi *vsi)
+{
+#define ICE_DVM_NUM_ZERO_VLAN_FLTRS 2
+#define ICE_SVM_NUM_ZERO_VLAN_FLTRS 1
+ /* no VLAN 0 filter is created when a port VLAN is active */
+ if (vsi->type == ICE_VSI_VF) {
+ if (WARN_ON(!vsi->vf))
+ return 0;
+
+ if (ice_vf_is_port_vlan_ena(vsi->vf))
+ return 0;
+ }
+
+ if (ice_is_dvm_ena(&vsi->back->hw))
+ return ICE_DVM_NUM_ZERO_VLAN_FLTRS;
+ else
+ return ICE_SVM_NUM_ZERO_VLAN_FLTRS;
+}
+
+/**
+ * ice_vsi_has_non_zero_vlans - check if VSI has any non-zero VLANs
+ * @vsi: VSI used to determine if any non-zero VLANs have been added
+ */
+bool ice_vsi_has_non_zero_vlans(struct ice_vsi *vsi)
+{
+ return (vsi->num_vlan > ice_vsi_num_zero_vlans(vsi));
+}
+
+/**
+ * ice_vsi_num_non_zero_vlans - get the number of non-zero VLANs for this VSI
+ * @vsi: VSI used to get the number of non-zero VLANs added
+ */
+u16 ice_vsi_num_non_zero_vlans(struct ice_vsi *vsi)
+{
+ return (vsi->num_vlan - ice_vsi_num_zero_vlans(vsi));
+}
+
+/**
+ * ice_is_feature_supported
+ * @pf: pointer to the struct ice_pf instance
+ * @f: feature enum to be checked
+ *
+ * returns true if feature is supported, false otherwise
+ */
+bool ice_is_feature_supported(struct ice_pf *pf, enum ice_feature f)
+{
+ if (f < 0 || f >= ICE_F_MAX)
+ return false;
+
+ return test_bit(f, pf->features);
+}
+
+/**
+ * ice_set_feature_support
+ * @pf: pointer to the struct ice_pf instance
+ * @f: feature enum to set
+ */
+static void ice_set_feature_support(struct ice_pf *pf, enum ice_feature f)
+{
+ if (f < 0 || f >= ICE_F_MAX)
+ return;
+
+ set_bit(f, pf->features);
+}
+
+/**
+ * ice_clear_feature_support
+ * @pf: pointer to the struct ice_pf instance
+ * @f: feature enum to clear
+ */
+void ice_clear_feature_support(struct ice_pf *pf, enum ice_feature f)
+{
+ if (f < 0 || f >= ICE_F_MAX)
+ return;
+
+ clear_bit(f, pf->features);
+}
+
+/**
+ * ice_init_feature_support
+ * @pf: pointer to the struct ice_pf instance
+ *
+ * called during init to setup supported feature
+ */
+void ice_init_feature_support(struct ice_pf *pf)
+{
+ switch (pf->hw.device_id) {
+ case ICE_DEV_ID_E810C_BACKPLANE:
+ case ICE_DEV_ID_E810C_QSFP:
+ case ICE_DEV_ID_E810C_SFP:
+ ice_set_feature_support(pf, ICE_F_DSCP);
+ ice_set_feature_support(pf, ICE_F_PTP_EXTTS);
+ if (ice_is_e810t(&pf->hw)) {
+ ice_set_feature_support(pf, ICE_F_SMA_CTRL);
+ if (ice_gnss_is_gps_present(&pf->hw))
+ ice_set_feature_support(pf, ICE_F_GNSS);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * ice_vsi_update_security - update security block in VSI
+ * @vsi: pointer to VSI structure
+ * @fill: function pointer to fill ctx
+ */
+int
+ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *))
+{
+ struct ice_vsi_ctx ctx = { 0 };
+
+ ctx.info = vsi->info;
+ ctx.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+ fill(&ctx);
+
+ if (ice_update_vsi(&vsi->back->hw, vsi->idx, &ctx, NULL))
+ return -ENODEV;
+
+ vsi->info = ctx.info;
+ return 0;
+}
+
+/**
+ * ice_vsi_ctx_set_antispoof - set antispoof function in VSI ctx
+ * @ctx: pointer to VSI ctx structure
+ */
+void ice_vsi_ctx_set_antispoof(struct ice_vsi_ctx *ctx)
+{
+ ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
+ (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+ ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+}
+
+/**
+ * ice_vsi_ctx_clear_antispoof - clear antispoof function in VSI ctx
+ * @ctx: pointer to VSI ctx structure
+ */
+void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx)
+{
+ ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF &
+ ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+ ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+}
+
+/**
+ * ice_vsi_ctx_set_allow_override - allow destination override on VSI
+ * @ctx: pointer to VSI ctx structure
+ */
+void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx)
+{
+ ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD;
+}
+
+/**
+ * ice_vsi_ctx_clear_allow_override - turn off destination override on VSI
+ * @ctx: pointer to VSI ctx structure
+ */
+void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx)
+{
+ ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
new file mode 100644
index 000000000..dcdf69a69
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_LIB_H_
+#define _ICE_LIB_H_
+
+#include "ice.h"
+#include "ice_vlan.h"
+
+const char *ice_vsi_type_str(enum ice_vsi_type vsi_type);
+
+bool ice_pf_state_is_nominal(struct ice_pf *pf);
+
+void ice_update_eth_stats(struct ice_vsi *vsi);
+
+int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx);
+
+int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_tx_ring **tx_rings, u16 q_idx);
+
+int ice_vsi_cfg_rxqs(struct ice_vsi *vsi);
+
+int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
+
+void ice_vsi_cfg_msix(struct ice_vsi *vsi);
+
+int ice_vsi_start_all_rx_rings(struct ice_vsi *vsi);
+
+int ice_vsi_stop_all_rx_rings(struct ice_vsi *vsi);
+
+int
+ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+ u16 rel_vmvf_num);
+
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi);
+
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi);
+
+bool ice_vsi_is_vlan_pruning_ena(struct ice_vsi *vsi);
+
+void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
+
+int ice_set_link(struct ice_vsi *vsi, bool ena);
+
+void ice_vsi_delete(struct ice_vsi *vsi);
+int ice_vsi_clear(struct ice_vsi *vsi);
+
+int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc);
+
+int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi);
+
+void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc);
+
+struct ice_vsi *
+ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
+ enum ice_vsi_type vsi_type, struct ice_vf *vf,
+ struct ice_channel *ch);
+
+void ice_napi_del(struct ice_vsi *vsi);
+
+int ice_vsi_release(struct ice_vsi *vsi);
+
+void ice_vsi_close(struct ice_vsi *vsi);
+
+int ice_ena_vsi(struct ice_vsi *vsi, bool locked);
+
+void ice_dis_vsi(struct ice_vsi *vsi, bool locked);
+
+int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id);
+
+int
+ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id);
+
+int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi);
+
+bool ice_is_reset_in_progress(unsigned long *state);
+int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout);
+
+void
+ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio,
+ bool ena_ts);
+
+void ice_vsi_dis_irq(struct ice_vsi *vsi);
+
+void ice_vsi_free_irq(struct ice_vsi *vsi);
+
+void ice_vsi_free_rx_rings(struct ice_vsi *vsi);
+
+void ice_vsi_free_tx_rings(struct ice_vsi *vsi);
+
+void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
+
+void ice_vsi_cfg_crc_strip(struct ice_vsi *vsi, bool disable);
+
+void ice_update_tx_ring_stats(struct ice_tx_ring *ring, u64 pkts, u64 bytes);
+
+void ice_update_rx_ring_stats(struct ice_rx_ring *ring, u64 pkts, u64 bytes);
+
+void ice_vsi_cfg_frame_size(struct ice_vsi *vsi);
+void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl);
+void ice_write_itr(struct ice_ring_container *rc, u16 itr);
+void ice_set_q_vector_intrl(struct ice_q_vector *q_vector);
+
+int ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set);
+
+bool ice_is_safe_mode(struct ice_pf *pf);
+bool ice_is_rdma_ena(struct ice_pf *pf);
+bool ice_is_dflt_vsi_in_use(struct ice_port_info *pi);
+bool ice_is_vsi_dflt_vsi(struct ice_vsi *vsi);
+int ice_set_dflt_vsi(struct ice_vsi *vsi);
+int ice_clear_dflt_vsi(struct ice_vsi *vsi);
+int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate);
+int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate);
+int ice_get_link_speed_kbps(struct ice_vsi *vsi);
+int ice_get_link_speed_mbps(struct ice_vsi *vsi);
+int
+ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *));
+
+void ice_vsi_ctx_set_antispoof(struct ice_vsi_ctx *ctx);
+
+void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx);
+
+void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx);
+
+void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx);
+int ice_vsi_add_vlan_zero(struct ice_vsi *vsi);
+int ice_vsi_del_vlan_zero(struct ice_vsi *vsi);
+bool ice_vsi_has_non_zero_vlans(struct ice_vsi *vsi);
+u16 ice_vsi_num_non_zero_vlans(struct ice_vsi *vsi);
+bool ice_is_feature_supported(struct ice_pf *pf, enum ice_feature f);
+void ice_clear_feature_support(struct ice_pf *pf, enum ice_feature f);
+void ice_init_feature_support(struct ice_pf *pf);
+bool ice_vsi_is_rx_queue_active(struct ice_vsi *vsi);
+#endif /* !_ICE_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
new file mode 100644
index 000000000..ab46cfca4
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -0,0 +1,9194 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* Intel(R) Ethernet Connection E800 Series Linux Driver */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <generated/utsrelease.h>
+#include <linux/crash_dump.h>
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
+#include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
+#include "ice_devlink.h"
+/* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the
+ * ice tracepoint functions. This must be done exactly once across the
+ * ice driver.
+ */
+#define CREATE_TRACE_POINTS
+#include "ice_trace.h"
+#include "ice_eswitch.h"
+#include "ice_tc_lib.h"
+#include "ice_vsi_vlan_ops.h"
+
+#define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver"
+static const char ice_driver_string[] = DRV_SUMMARY;
+static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation.";
+
+/* DDP Package file located in firmware search paths (e.g. /lib/firmware/) */
+#define ICE_DDP_PKG_PATH "intel/ice/ddp/"
+#define ICE_DDP_PKG_FILE ICE_DDP_PKG_PATH "ice.pkg"
+
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION(DRV_SUMMARY);
+MODULE_LICENSE("GPL v2");
+MODULE_FIRMWARE(ICE_DDP_PKG_FILE);
+
+static int debug = -1;
+module_param(debug, int, 0644);
+#ifndef CONFIG_DYNAMIC_DEBUG
+MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)");
+#else
+MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
+#endif /* !CONFIG_DYNAMIC_DEBUG */
+
+static DEFINE_IDA(ice_aux_ida);
+DEFINE_STATIC_KEY_FALSE(ice_xdp_locking_key);
+EXPORT_SYMBOL(ice_xdp_locking_key);
+
+/**
+ * ice_hw_to_dev - Get device pointer from the hardware structure
+ * @hw: pointer to the device HW structure
+ *
+ * Used to access the device pointer from compilation units which can't easily
+ * include the definition of struct ice_pf without leading to circular header
+ * dependencies.
+ */
+struct device *ice_hw_to_dev(struct ice_hw *hw)
+{
+ struct ice_pf *pf = container_of(hw, struct ice_pf, hw);
+
+ return &pf->pdev->dev;
+}
+
+static struct workqueue_struct *ice_wq;
+static const struct net_device_ops ice_netdev_safe_mode_ops;
+static const struct net_device_ops ice_netdev_ops;
+
+static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type);
+
+static void ice_vsi_release_all(struct ice_pf *pf);
+
+static int ice_rebuild_channels(struct ice_pf *pf);
+static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_adv_fltr);
+
+static int
+ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch,
+ void *cb_priv, enum tc_setup_type type, void *type_data,
+ void *data,
+ void (*cleanup)(struct flow_block_cb *block_cb));
+
+bool netif_is_ice(struct net_device *dev)
+{
+ return dev && (dev->netdev_ops == &ice_netdev_ops);
+}
+
+/**
+ * ice_get_tx_pending - returns number of Tx descriptors not processed
+ * @ring: the ring of descriptors
+ */
+static u16 ice_get_tx_pending(struct ice_tx_ring *ring)
+{
+ u16 head, tail;
+
+ head = ring->next_to_clean;
+ tail = ring->next_to_use;
+
+ if (head != tail)
+ return (head < tail) ?
+ tail - head : (tail + ring->count - head);
+ return 0;
+}
+
+/**
+ * ice_check_for_hang_subtask - check for and recover hung queues
+ * @pf: pointer to PF struct
+ */
+static void ice_check_for_hang_subtask(struct ice_pf *pf)
+{
+ struct ice_vsi *vsi = NULL;
+ struct ice_hw *hw;
+ unsigned int i;
+ int packets;
+ u32 v;
+
+ ice_for_each_vsi(pf, v)
+ if (pf->vsi[v] && pf->vsi[v]->type == ICE_VSI_PF) {
+ vsi = pf->vsi[v];
+ break;
+ }
+
+ if (!vsi || test_bit(ICE_VSI_DOWN, vsi->state))
+ return;
+
+ if (!(vsi->netdev && netif_carrier_ok(vsi->netdev)))
+ return;
+
+ hw = &vsi->back->hw;
+
+ ice_for_each_txq(vsi, i) {
+ struct ice_tx_ring *tx_ring = vsi->tx_rings[i];
+
+ if (!tx_ring)
+ continue;
+ if (ice_ring_ch_enabled(tx_ring))
+ continue;
+
+ if (tx_ring->desc) {
+ /* If packet counter has not changed the queue is
+ * likely stalled, so force an interrupt for this
+ * queue.
+ *
+ * prev_pkt would be negative if there was no
+ * pending work.
+ */
+ packets = tx_ring->stats.pkts & INT_MAX;
+ if (tx_ring->tx_stats.prev_pkt == packets) {
+ /* Trigger sw interrupt to revive the queue */
+ ice_trigger_sw_intr(hw, tx_ring->q_vector);
+ continue;
+ }
+
+ /* Memory barrier between read of packet count and call
+ * to ice_get_tx_pending()
+ */
+ smp_rmb();
+ tx_ring->tx_stats.prev_pkt =
+ ice_get_tx_pending(tx_ring) ? packets : -1;
+ }
+ }
+}
+
+/**
+ * ice_init_mac_fltr - Set initial MAC filters
+ * @pf: board private structure
+ *
+ * Set initial set of MAC filters for PF VSI; configure filters for permanent
+ * address and broadcast address. If an error is encountered, netdevice will be
+ * unregistered.
+ */
+static int ice_init_mac_fltr(struct ice_pf *pf)
+{
+ struct ice_vsi *vsi;
+ u8 *perm_addr;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi)
+ return -EINVAL;
+
+ perm_addr = vsi->port_info->mac.perm_addr;
+ return ice_fltr_add_mac_and_broadcast(vsi, perm_addr, ICE_FWD_TO_VSI);
+}
+
+/**
+ * ice_add_mac_to_sync_list - creates list of MAC addresses to be synced
+ * @netdev: the net device on which the sync is happening
+ * @addr: MAC address to sync
+ *
+ * This is a callback function which is called by the in kernel device sync
+ * functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only
+ * populates the tmp_sync_list, which is later used by ice_add_mac to add the
+ * MAC filters from the hardware.
+ */
+static int ice_add_mac_to_sync_list(struct net_device *netdev, const u8 *addr)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+
+ if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_sync_list, addr,
+ ICE_FWD_TO_VSI))
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * ice_add_mac_to_unsync_list - creates list of MAC addresses to be unsynced
+ * @netdev: the net device on which the unsync is happening
+ * @addr: MAC address to unsync
+ *
+ * This is a callback function which is called by the in kernel device unsync
+ * functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only
+ * populates the tmp_unsync_list, which is later used by ice_remove_mac to
+ * delete the MAC filters from the hardware.
+ */
+static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+
+ /* Under some circumstances, we might receive a request to delete our
+ * own device address from our uc list. Because we store the device
+ * address in the VSI's MAC filter list, we need to ignore such
+ * requests and not delete our device address from this list.
+ */
+ if (ether_addr_equal(addr, netdev->dev_addr))
+ return 0;
+
+ if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr,
+ ICE_FWD_TO_VSI))
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * ice_vsi_fltr_changed - check if filter state changed
+ * @vsi: VSI to be checked
+ *
+ * returns true if filter state has changed, false otherwise.
+ */
+static bool ice_vsi_fltr_changed(struct ice_vsi *vsi)
+{
+ return test_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state) ||
+ test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
+}
+
+/**
+ * ice_set_promisc - Enable promiscuous mode for a given PF
+ * @vsi: the VSI being configured
+ * @promisc_m: mask of promiscuous config bits
+ *
+ */
+static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m)
+{
+ int status;
+
+ if (vsi->type != ICE_VSI_PF)
+ return 0;
+
+ if (ice_vsi_has_non_zero_vlans(vsi)) {
+ promisc_m |= (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX);
+ status = ice_fltr_set_vlan_vsi_promisc(&vsi->back->hw, vsi,
+ promisc_m);
+ } else {
+ status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
+ promisc_m, 0);
+ }
+ if (status && status != -EEXIST)
+ return status;
+
+ netdev_dbg(vsi->netdev, "set promisc filter bits for VSI %i: 0x%x\n",
+ vsi->vsi_num, promisc_m);
+ return 0;
+}
+
+/**
+ * ice_clear_promisc - Disable promiscuous mode for a given PF
+ * @vsi: the VSI being configured
+ * @promisc_m: mask of promiscuous config bits
+ *
+ */
+static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m)
+{
+ int status;
+
+ if (vsi->type != ICE_VSI_PF)
+ return 0;
+
+ if (ice_vsi_has_non_zero_vlans(vsi)) {
+ promisc_m |= (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX);
+ status = ice_fltr_clear_vlan_vsi_promisc(&vsi->back->hw, vsi,
+ promisc_m);
+ } else {
+ status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
+ promisc_m, 0);
+ }
+
+ netdev_dbg(vsi->netdev, "clear promisc filter bits for VSI %i: 0x%x\n",
+ vsi->vsi_num, promisc_m);
+ return status;
+}
+
+/**
+ * ice_get_devlink_port - Get devlink port from netdev
+ * @netdev: the netdevice structure
+ */
+static struct devlink_port *ice_get_devlink_port(struct net_device *netdev)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+ if (!ice_is_switchdev_running(pf))
+ return NULL;
+
+ return &pf->devlink_port;
+}
+
+/**
+ * ice_vsi_sync_fltr - Update the VSI filter list to the HW
+ * @vsi: ptr to the VSI
+ *
+ * Push any outstanding VSI filter changes through the AdminQ.
+ */
+static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
+{
+ struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+ struct device *dev = ice_pf_to_dev(vsi->back);
+ struct net_device *netdev = vsi->netdev;
+ bool promisc_forced_on = false;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u32 changed_flags = 0;
+ int err;
+
+ if (!vsi->netdev)
+ return -EINVAL;
+
+ while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
+ usleep_range(1000, 2000);
+
+ changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
+ vsi->current_netdev_flags = vsi->netdev->flags;
+
+ INIT_LIST_HEAD(&vsi->tmp_sync_list);
+ INIT_LIST_HEAD(&vsi->tmp_unsync_list);
+
+ if (ice_vsi_fltr_changed(vsi)) {
+ clear_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
+ clear_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
+
+ /* grab the netdev's addr_list_lock */
+ netif_addr_lock_bh(netdev);
+ __dev_uc_sync(netdev, ice_add_mac_to_sync_list,
+ ice_add_mac_to_unsync_list);
+ __dev_mc_sync(netdev, ice_add_mac_to_sync_list,
+ ice_add_mac_to_unsync_list);
+ /* our temp lists are populated. release lock */
+ netif_addr_unlock_bh(netdev);
+ }
+
+ /* Remove MAC addresses in the unsync list */
+ err = ice_fltr_remove_mac_list(vsi, &vsi->tmp_unsync_list);
+ ice_fltr_free_list(dev, &vsi->tmp_unsync_list);
+ if (err) {
+ netdev_err(netdev, "Failed to delete MAC filters\n");
+ /* if we failed because of alloc failures, just bail */
+ if (err == -ENOMEM)
+ goto out;
+ }
+
+ /* Add MAC addresses in the sync list */
+ err = ice_fltr_add_mac_list(vsi, &vsi->tmp_sync_list);
+ ice_fltr_free_list(dev, &vsi->tmp_sync_list);
+ /* If filter is added successfully or already exists, do not go into
+ * 'if' condition and report it as error. Instead continue processing
+ * rest of the function.
+ */
+ if (err && err != -EEXIST) {
+ netdev_err(netdev, "Failed to add MAC filters\n");
+ /* If there is no more space for new umac filters, VSI
+ * should go into promiscuous mode. There should be some
+ * space reserved for promiscuous filters.
+ */
+ if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC &&
+ !test_and_set_bit(ICE_FLTR_OVERFLOW_PROMISC,
+ vsi->state)) {
+ promisc_forced_on = true;
+ netdev_warn(netdev, "Reached MAC filter limit, forcing promisc mode on VSI %d\n",
+ vsi->vsi_num);
+ } else {
+ goto out;
+ }
+ }
+ err = 0;
+ /* check for changes in promiscuous modes */
+ if (changed_flags & IFF_ALLMULTI) {
+ if (vsi->current_netdev_flags & IFF_ALLMULTI) {
+ err = ice_set_promisc(vsi, ICE_MCAST_PROMISC_BITS);
+ if (err) {
+ vsi->current_netdev_flags &= ~IFF_ALLMULTI;
+ goto out_promisc;
+ }
+ } else {
+ /* !(vsi->current_netdev_flags & IFF_ALLMULTI) */
+ err = ice_clear_promisc(vsi, ICE_MCAST_PROMISC_BITS);
+ if (err) {
+ vsi->current_netdev_flags |= IFF_ALLMULTI;
+ goto out_promisc;
+ }
+ }
+ }
+
+ if (((changed_flags & IFF_PROMISC) || promisc_forced_on) ||
+ test_bit(ICE_VSI_PROMISC_CHANGED, vsi->state)) {
+ clear_bit(ICE_VSI_PROMISC_CHANGED, vsi->state);
+ if (vsi->current_netdev_flags & IFF_PROMISC) {
+ /* Apply Rx filter rule to get traffic from wire */
+ if (!ice_is_dflt_vsi_in_use(vsi->port_info)) {
+ err = ice_set_dflt_vsi(vsi);
+ if (err && err != -EEXIST) {
+ netdev_err(netdev, "Error %d setting default VSI %i Rx rule\n",
+ err, vsi->vsi_num);
+ vsi->current_netdev_flags &=
+ ~IFF_PROMISC;
+ goto out_promisc;
+ }
+ err = 0;
+ vlan_ops->dis_rx_filtering(vsi);
+
+ /* promiscuous mode implies allmulticast so
+ * that VSIs that are in promiscuous mode are
+ * subscribed to multicast packets coming to
+ * the port
+ */
+ err = ice_set_promisc(vsi,
+ ICE_MCAST_PROMISC_BITS);
+ if (err)
+ goto out_promisc;
+ }
+ } else {
+ /* Clear Rx filter to remove traffic from wire */
+ if (ice_is_vsi_dflt_vsi(vsi)) {
+ err = ice_clear_dflt_vsi(vsi);
+ if (err) {
+ netdev_err(netdev, "Error %d clearing default VSI %i Rx rule\n",
+ err, vsi->vsi_num);
+ vsi->current_netdev_flags |=
+ IFF_PROMISC;
+ goto out_promisc;
+ }
+ if (vsi->netdev->features &
+ NETIF_F_HW_VLAN_CTAG_FILTER)
+ vlan_ops->ena_rx_filtering(vsi);
+ }
+
+ /* disable allmulti here, but only if allmulti is not
+ * still enabled for the netdev
+ */
+ if (!(vsi->current_netdev_flags & IFF_ALLMULTI)) {
+ err = ice_clear_promisc(vsi,
+ ICE_MCAST_PROMISC_BITS);
+ if (err) {
+ netdev_err(netdev, "Error %d clearing multicast promiscuous on VSI %i\n",
+ err, vsi->vsi_num);
+ }
+ }
+ }
+ }
+ goto exit;
+
+out_promisc:
+ set_bit(ICE_VSI_PROMISC_CHANGED, vsi->state);
+ goto exit;
+out:
+ /* if something went wrong then set the changed flag so we try again */
+ set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
+ set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
+exit:
+ clear_bit(ICE_CFG_BUSY, vsi->state);
+ return err;
+}
+
+/**
+ * ice_sync_fltr_subtask - Sync the VSI filter list with HW
+ * @pf: board private structure
+ */
+static void ice_sync_fltr_subtask(struct ice_pf *pf)
+{
+ int v;
+
+ if (!pf || !(test_bit(ICE_FLAG_FLTR_SYNC, pf->flags)))
+ return;
+
+ clear_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
+
+ ice_for_each_vsi(pf, v)
+ if (pf->vsi[v] && ice_vsi_fltr_changed(pf->vsi[v]) &&
+ ice_vsi_sync_fltr(pf->vsi[v])) {
+ /* come back and try again later */
+ set_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
+ break;
+ }
+}
+
+/**
+ * ice_pf_dis_all_vsi - Pause all VSIs on a PF
+ * @pf: the PF
+ * @locked: is the rtnl_lock already held
+ */
+static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
+{
+ int node;
+ int v;
+
+ ice_for_each_vsi(pf, v)
+ if (pf->vsi[v])
+ ice_dis_vsi(pf->vsi[v], locked);
+
+ for (node = 0; node < ICE_MAX_PF_AGG_NODES; node++)
+ pf->pf_agg_node[node].num_vsis = 0;
+
+ for (node = 0; node < ICE_MAX_VF_AGG_NODES; node++)
+ pf->vf_agg_node[node].num_vsis = 0;
+}
+
+/**
+ * ice_clear_sw_switch_recipes - clear switch recipes
+ * @pf: board private structure
+ *
+ * Mark switch recipes as not created in sw structures. There are cases where
+ * rules (especially advanced rules) need to be restored, either re-read from
+ * hardware or added again. For example after the reset. 'recp_created' flag
+ * prevents from doing that and need to be cleared upfront.
+ */
+static void ice_clear_sw_switch_recipes(struct ice_pf *pf)
+{
+ struct ice_sw_recipe *recp;
+ u8 i;
+
+ recp = pf->hw.switch_info->recp_list;
+ for (i = 0; i < ICE_MAX_NUM_RECIPES; i++)
+ recp[i].recp_created = false;
+}
+
+/**
+ * ice_prepare_for_reset - prep for reset
+ * @pf: board private structure
+ * @reset_type: reset type requested
+ *
+ * Inform or close all dependent features in prep for reset.
+ */
+static void
+ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
+{
+ struct ice_hw *hw = &pf->hw;
+ struct ice_vsi *vsi;
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ dev_dbg(ice_pf_to_dev(pf), "reset_type=%d\n", reset_type);
+
+ /* already prepared for reset */
+ if (test_bit(ICE_PREPARED_FOR_RESET, pf->state))
+ return;
+
+ ice_unplug_aux_dev(pf);
+
+ /* Notify VFs of impending reset */
+ if (ice_check_sq_alive(hw, &hw->mailboxq))
+ ice_vc_notify_reset(pf);
+
+ /* Disable VFs until reset is completed */
+ mutex_lock(&pf->vfs.table_lock);
+ ice_for_each_vf(pf, bkt, vf)
+ ice_set_vf_state_dis(vf);
+ mutex_unlock(&pf->vfs.table_lock);
+
+ if (ice_is_eswitch_mode_switchdev(pf)) {
+ if (reset_type != ICE_RESET_PFR)
+ ice_clear_sw_switch_recipes(pf);
+ }
+
+ /* release ADQ specific HW and SW resources */
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi)
+ goto skip;
+
+ /* to be on safe side, reset orig_rss_size so that normal flow
+ * of deciding rss_size can take precedence
+ */
+ vsi->orig_rss_size = 0;
+
+ if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
+ if (reset_type == ICE_RESET_PFR) {
+ vsi->old_ena_tc = vsi->all_enatc;
+ vsi->old_numtc = vsi->all_numtc;
+ } else {
+ ice_remove_q_channels(vsi, true);
+
+ /* for other reset type, do not support channel rebuild
+ * hence reset needed info
+ */
+ vsi->old_ena_tc = 0;
+ vsi->all_enatc = 0;
+ vsi->old_numtc = 0;
+ vsi->all_numtc = 0;
+ vsi->req_txq = 0;
+ vsi->req_rxq = 0;
+ clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
+ memset(&vsi->mqprio_qopt, 0, sizeof(vsi->mqprio_qopt));
+ }
+ }
+skip:
+
+ /* clear SW filtering DB */
+ ice_clear_hw_tbls(hw);
+ /* disable the VSIs and their queues that are not already DOWN */
+ ice_pf_dis_all_vsi(pf, false);
+
+ if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
+ ice_ptp_prepare_for_reset(pf);
+
+ if (ice_is_feature_supported(pf, ICE_F_GNSS))
+ ice_gnss_exit(pf);
+
+ if (hw->port_info)
+ ice_sched_clear_port(hw->port_info);
+
+ ice_shutdown_all_ctrlq(hw);
+
+ set_bit(ICE_PREPARED_FOR_RESET, pf->state);
+}
+
+/**
+ * ice_do_reset - Initiate one of many types of resets
+ * @pf: board private structure
+ * @reset_type: reset type requested before this function was called.
+ */
+static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+
+ dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
+
+ ice_prepare_for_reset(pf, reset_type);
+
+ /* trigger the reset */
+ if (ice_reset(hw, reset_type)) {
+ dev_err(dev, "reset %d failed\n", reset_type);
+ set_bit(ICE_RESET_FAILED, pf->state);
+ clear_bit(ICE_RESET_OICR_RECV, pf->state);
+ clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
+ clear_bit(ICE_PFR_REQ, pf->state);
+ clear_bit(ICE_CORER_REQ, pf->state);
+ clear_bit(ICE_GLOBR_REQ, pf->state);
+ wake_up(&pf->reset_wait_queue);
+ return;
+ }
+
+ /* PFR is a bit of a special case because it doesn't result in an OICR
+ * interrupt. So for PFR, rebuild after the reset and clear the reset-
+ * associated state bits.
+ */
+ if (reset_type == ICE_RESET_PFR) {
+ pf->pfr_count++;
+ ice_rebuild(pf, reset_type);
+ clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
+ clear_bit(ICE_PFR_REQ, pf->state);
+ wake_up(&pf->reset_wait_queue);
+ ice_reset_all_vfs(pf);
+ }
+}
+
+/**
+ * ice_reset_subtask - Set up for resetting the device and driver
+ * @pf: board private structure
+ */
+static void ice_reset_subtask(struct ice_pf *pf)
+{
+ enum ice_reset_req reset_type = ICE_RESET_INVAL;
+
+ /* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an
+ * OICR interrupt. The OICR handler (ice_misc_intr) determines what type
+ * of reset is pending and sets bits in pf->state indicating the reset
+ * type and ICE_RESET_OICR_RECV. So, if the latter bit is set
+ * prepare for pending reset if not already (for PF software-initiated
+ * global resets the software should already be prepared for it as
+ * indicated by ICE_PREPARED_FOR_RESET; for global resets initiated
+ * by firmware or software on other PFs, that bit is not set so prepare
+ * for the reset now), poll for reset done, rebuild and return.
+ */
+ if (test_bit(ICE_RESET_OICR_RECV, pf->state)) {
+ /* Perform the largest reset requested */
+ if (test_and_clear_bit(ICE_CORER_RECV, pf->state))
+ reset_type = ICE_RESET_CORER;
+ if (test_and_clear_bit(ICE_GLOBR_RECV, pf->state))
+ reset_type = ICE_RESET_GLOBR;
+ if (test_and_clear_bit(ICE_EMPR_RECV, pf->state))
+ reset_type = ICE_RESET_EMPR;
+ /* return if no valid reset type requested */
+ if (reset_type == ICE_RESET_INVAL)
+ return;
+ ice_prepare_for_reset(pf, reset_type);
+
+ /* make sure we are ready to rebuild */
+ if (ice_check_reset(&pf->hw)) {
+ set_bit(ICE_RESET_FAILED, pf->state);
+ } else {
+ /* done with reset. start rebuild */
+ pf->hw.reset_ongoing = false;
+ ice_rebuild(pf, reset_type);
+ /* clear bit to resume normal operations, but
+ * ICE_NEEDS_RESTART bit is set in case rebuild failed
+ */
+ clear_bit(ICE_RESET_OICR_RECV, pf->state);
+ clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
+ clear_bit(ICE_PFR_REQ, pf->state);
+ clear_bit(ICE_CORER_REQ, pf->state);
+ clear_bit(ICE_GLOBR_REQ, pf->state);
+ wake_up(&pf->reset_wait_queue);
+ ice_reset_all_vfs(pf);
+ }
+
+ return;
+ }
+
+ /* No pending resets to finish processing. Check for new resets */
+ if (test_bit(ICE_PFR_REQ, pf->state))
+ reset_type = ICE_RESET_PFR;
+ if (test_bit(ICE_CORER_REQ, pf->state))
+ reset_type = ICE_RESET_CORER;
+ if (test_bit(ICE_GLOBR_REQ, pf->state))
+ reset_type = ICE_RESET_GLOBR;
+ /* If no valid reset type requested just return */
+ if (reset_type == ICE_RESET_INVAL)
+ return;
+
+ /* reset if not already down or busy */
+ if (!test_bit(ICE_DOWN, pf->state) &&
+ !test_bit(ICE_CFG_BUSY, pf->state)) {
+ ice_do_reset(pf, reset_type);
+ }
+}
+
+/**
+ * ice_print_topo_conflict - print topology conflict message
+ * @vsi: the VSI whose topology status is being checked
+ */
+static void ice_print_topo_conflict(struct ice_vsi *vsi)
+{
+ switch (vsi->port_info->phy.link_info.topo_media_conflict) {
+ case ICE_AQ_LINK_TOPO_CONFLICT:
+ case ICE_AQ_LINK_MEDIA_CONFLICT:
+ case ICE_AQ_LINK_TOPO_UNREACH_PRT:
+ case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT:
+ case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA:
+ netdev_info(vsi->netdev, "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n");
+ break;
+ case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA:
+ if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, vsi->back->flags))
+ netdev_warn(vsi->netdev, "An unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules\n");
+ else
+ netdev_err(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * ice_print_link_msg - print link up or down message
+ * @vsi: the VSI whose link status is being queried
+ * @isup: boolean for if the link is now up or down
+ */
+void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
+{
+ struct ice_aqc_get_phy_caps_data *caps;
+ const char *an_advertised;
+ const char *fec_req;
+ const char *speed;
+ const char *fec;
+ const char *fc;
+ const char *an;
+ int status;
+
+ if (!vsi)
+ return;
+
+ if (vsi->current_isup == isup)
+ return;
+
+ vsi->current_isup = isup;
+
+ if (!isup) {
+ netdev_info(vsi->netdev, "NIC Link is Down\n");
+ return;
+ }
+
+ switch (vsi->port_info->phy.link_info.link_speed) {
+ case ICE_AQ_LINK_SPEED_100GB:
+ speed = "100 G";
+ break;
+ case ICE_AQ_LINK_SPEED_50GB:
+ speed = "50 G";
+ break;
+ case ICE_AQ_LINK_SPEED_40GB:
+ speed = "40 G";
+ break;
+ case ICE_AQ_LINK_SPEED_25GB:
+ speed = "25 G";
+ break;
+ case ICE_AQ_LINK_SPEED_20GB:
+ speed = "20 G";
+ break;
+ case ICE_AQ_LINK_SPEED_10GB:
+ speed = "10 G";
+ break;
+ case ICE_AQ_LINK_SPEED_5GB:
+ speed = "5 G";
+ break;
+ case ICE_AQ_LINK_SPEED_2500MB:
+ speed = "2.5 G";
+ break;
+ case ICE_AQ_LINK_SPEED_1000MB:
+ speed = "1 G";
+ break;
+ case ICE_AQ_LINK_SPEED_100MB:
+ speed = "100 M";
+ break;
+ default:
+ speed = "Unknown ";
+ break;
+ }
+
+ switch (vsi->port_info->fc.current_mode) {
+ case ICE_FC_FULL:
+ fc = "Rx/Tx";
+ break;
+ case ICE_FC_TX_PAUSE:
+ fc = "Tx";
+ break;
+ case ICE_FC_RX_PAUSE:
+ fc = "Rx";
+ break;
+ case ICE_FC_NONE:
+ fc = "None";
+ break;
+ default:
+ fc = "Unknown";
+ break;
+ }
+
+ /* Get FEC mode based on negotiated link info */
+ switch (vsi->port_info->phy.link_info.fec_info) {
+ case ICE_AQ_LINK_25G_RS_528_FEC_EN:
+ case ICE_AQ_LINK_25G_RS_544_FEC_EN:
+ fec = "RS-FEC";
+ break;
+ case ICE_AQ_LINK_25G_KR_FEC_EN:
+ fec = "FC-FEC/BASE-R";
+ break;
+ default:
+ fec = "NONE";
+ break;
+ }
+
+ /* check if autoneg completed, might be false due to not supported */
+ if (vsi->port_info->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
+ an = "True";
+ else
+ an = "False";
+
+ /* Get FEC mode requested based on PHY caps last SW configuration */
+ caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+ if (!caps) {
+ fec_req = "Unknown";
+ an_advertised = "Unknown";
+ goto done;
+ }
+
+ status = ice_aq_get_phy_caps(vsi->port_info, false,
+ ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL);
+ if (status)
+ netdev_info(vsi->netdev, "Get phy capability failed.\n");
+
+ an_advertised = ice_is_phy_caps_an_enabled(caps) ? "On" : "Off";
+
+ if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
+ fec_req = "RS-FEC";
+ else if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+ fec_req = "FC-FEC/BASE-R";
+ else
+ fec_req = "NONE";
+
+ kfree(caps);
+
+done:
+ netdev_info(vsi->netdev, "NIC Link is up %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg Advertised: %s, Autoneg Negotiated: %s, Flow Control: %s\n",
+ speed, fec_req, fec, an_advertised, an, fc);
+ ice_print_topo_conflict(vsi);
+}
+
+/**
+ * ice_vsi_link_event - update the VSI's netdev
+ * @vsi: the VSI on which the link event occurred
+ * @link_up: whether or not the VSI needs to be set up or down
+ */
+static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
+{
+ if (!vsi)
+ return;
+
+ if (test_bit(ICE_VSI_DOWN, vsi->state) || !vsi->netdev)
+ return;
+
+ if (vsi->type == ICE_VSI_PF) {
+ if (link_up == netif_carrier_ok(vsi->netdev))
+ return;
+
+ if (link_up) {
+ netif_carrier_on(vsi->netdev);
+ netif_tx_wake_all_queues(vsi->netdev);
+ } else {
+ netif_carrier_off(vsi->netdev);
+ netif_tx_stop_all_queues(vsi->netdev);
+ }
+ }
+}
+
+/**
+ * ice_set_dflt_mib - send a default config MIB to the FW
+ * @pf: private PF struct
+ *
+ * This function sends a default configuration MIB to the FW.
+ *
+ * If this function errors out at any point, the driver is still able to
+ * function. The main impact is that LFC may not operate as expected.
+ * Therefore an error state in this function should be treated with a DBG
+ * message and continue on with driver rebuild/reenable.
+ */
+static void ice_set_dflt_mib(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ u8 mib_type, *buf, *lldpmib = NULL;
+ u16 len, typelen, offset = 0;
+ struct ice_lldp_org_tlv *tlv;
+ struct ice_hw *hw = &pf->hw;
+ u32 ouisubtype;
+
+ mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB;
+ lldpmib = kzalloc(ICE_LLDPDU_SIZE, GFP_KERNEL);
+ if (!lldpmib) {
+ dev_dbg(dev, "%s Failed to allocate MIB memory\n",
+ __func__);
+ return;
+ }
+
+ /* Add ETS CFG TLV */
+ tlv = (struct ice_lldp_org_tlv *)lldpmib;
+ typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+ ICE_IEEE_ETS_TLV_LEN);
+ tlv->typelen = htons(typelen);
+ ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_IEEE_SUBTYPE_ETS_CFG);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ buf = tlv->tlvinfo;
+ buf[0] = 0;
+
+ /* ETS CFG all UPs map to TC 0. Next 4 (1 - 4) Octets = 0.
+ * Octets 5 - 12 are BW values, set octet 5 to 100% BW.
+ * Octets 13 - 20 are TSA values - leave as zeros
+ */
+ buf[5] = 0x64;
+ len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
+ offset += len + 2;
+ tlv = (struct ice_lldp_org_tlv *)
+ ((char *)tlv + sizeof(tlv->typelen) + len);
+
+ /* Add ETS REC TLV */
+ buf = tlv->tlvinfo;
+ tlv->typelen = htons(typelen);
+
+ ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_IEEE_SUBTYPE_ETS_REC);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ /* First octet of buf is reserved
+ * Octets 1 - 4 map UP to TC - all UPs map to zero
+ * Octets 5 - 12 are BW values - set TC 0 to 100%.
+ * Octets 13 - 20 are TSA value - leave as zeros
+ */
+ buf[5] = 0x64;
+ offset += len + 2;
+ tlv = (struct ice_lldp_org_tlv *)
+ ((char *)tlv + sizeof(tlv->typelen) + len);
+
+ /* Add PFC CFG TLV */
+ typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+ ICE_IEEE_PFC_TLV_LEN);
+ tlv->typelen = htons(typelen);
+
+ ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_IEEE_SUBTYPE_PFC_CFG);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ /* Octet 1 left as all zeros - PFC disabled */
+ buf[0] = 0x08;
+ len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
+ offset += len + 2;
+
+ if (ice_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, offset, NULL))
+ dev_dbg(dev, "%s Failed to set default LLDP MIB\n", __func__);
+
+ kfree(lldpmib);
+}
+
+/**
+ * ice_check_phy_fw_load - check if PHY FW load failed
+ * @pf: pointer to PF struct
+ * @link_cfg_err: bitmap from the link info structure
+ *
+ * check if external PHY FW load failed and print an error message if it did
+ */
+static void ice_check_phy_fw_load(struct ice_pf *pf, u8 link_cfg_err)
+{
+ if (!(link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE)) {
+ clear_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags);
+ return;
+ }
+
+ if (test_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags))
+ return;
+
+ if (link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE) {
+ dev_err(ice_pf_to_dev(pf), "Device failed to load the FW for the external PHY. Please download and install the latest NVM for your device and try again\n");
+ set_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags);
+ }
+}
+
+/**
+ * ice_check_module_power
+ * @pf: pointer to PF struct
+ * @link_cfg_err: bitmap from the link info structure
+ *
+ * check module power level returned by a previous call to aq_get_link_info
+ * and print error messages if module power level is not supported
+ */
+static void ice_check_module_power(struct ice_pf *pf, u8 link_cfg_err)
+{
+ /* if module power level is supported, clear the flag */
+ if (!(link_cfg_err & (ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT |
+ ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED))) {
+ clear_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
+ return;
+ }
+
+ /* if ICE_FLAG_MOD_POWER_UNSUPPORTED was previously set and the
+ * above block didn't clear this bit, there's nothing to do
+ */
+ if (test_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags))
+ return;
+
+ if (link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT) {
+ dev_err(ice_pf_to_dev(pf), "The installed module is incompatible with the device's NVM image. Cannot start link\n");
+ set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
+ } else if (link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED) {
+ dev_err(ice_pf_to_dev(pf), "The module's power requirements exceed the device's power supply. Cannot start link\n");
+ set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
+ }
+}
+
+/**
+ * ice_check_link_cfg_err - check if link configuration failed
+ * @pf: pointer to the PF struct
+ * @link_cfg_err: bitmap from the link info structure
+ *
+ * print if any link configuration failure happens due to the value in the
+ * link_cfg_err parameter in the link info structure
+ */
+static void ice_check_link_cfg_err(struct ice_pf *pf, u8 link_cfg_err)
+{
+ ice_check_module_power(pf, link_cfg_err);
+ ice_check_phy_fw_load(pf, link_cfg_err);
+}
+
+/**
+ * ice_link_event - process the link event
+ * @pf: PF that the link event is associated with
+ * @pi: port_info for the port that the link event is associated with
+ * @link_up: true if the physical link is up and false if it is down
+ * @link_speed: current link speed received from the link event
+ *
+ * Returns 0 on success and negative on failure
+ */
+static int
+ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
+ u16 link_speed)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_phy_info *phy_info;
+ struct ice_vsi *vsi;
+ u16 old_link_speed;
+ bool old_link;
+ int status;
+
+ phy_info = &pi->phy;
+ phy_info->link_info_old = phy_info->link_info;
+
+ old_link = !!(phy_info->link_info_old.link_info & ICE_AQ_LINK_UP);
+ old_link_speed = phy_info->link_info_old.link_speed;
+
+ /* update the link info structures and re-enable link events,
+ * don't bail on failure due to other book keeping needed
+ */
+ status = ice_update_link_info(pi);
+ if (status)
+ dev_dbg(dev, "Failed to update link status on port %d, err %d aq_err %s\n",
+ pi->lport, status,
+ ice_aq_str(pi->hw->adminq.sq_last_status));
+
+ ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err);
+
+ /* Check if the link state is up after updating link info, and treat
+ * this event as an UP event since the link is actually UP now.
+ */
+ if (phy_info->link_info.link_info & ICE_AQ_LINK_UP)
+ link_up = true;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi || !vsi->port_info)
+ return -EINVAL;
+
+ /* turn off PHY if media was removed */
+ if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) &&
+ !(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
+ set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+ ice_set_link(vsi, false);
+ }
+
+ /* if the old link up/down and speed is the same as the new */
+ if (link_up == old_link && link_speed == old_link_speed)
+ return 0;
+
+ if (!ice_is_e810(&pf->hw))
+ ice_ptp_link_change(pf, pf->hw.pf_id, link_up);
+
+ if (ice_is_dcb_active(pf)) {
+ if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+ ice_dcb_rebuild(pf);
+ } else {
+ if (link_up)
+ ice_set_dflt_mib(pf);
+ }
+ ice_vsi_link_event(vsi, link_up);
+ ice_print_link_msg(vsi, link_up);
+
+ ice_vc_notify_link_state(pf);
+
+ return 0;
+}
+
+/**
+ * ice_watchdog_subtask - periodic tasks not using event driven scheduling
+ * @pf: board private structure
+ */
+static void ice_watchdog_subtask(struct ice_pf *pf)
+{
+ int i;
+
+ /* if interface is down do nothing */
+ if (test_bit(ICE_DOWN, pf->state) ||
+ test_bit(ICE_CFG_BUSY, pf->state))
+ return;
+
+ /* make sure we don't do these things too often */
+ if (time_before(jiffies,
+ pf->serv_tmr_prev + pf->serv_tmr_period))
+ return;
+
+ pf->serv_tmr_prev = jiffies;
+
+ /* Update the stats for active netdevs so the network stack
+ * can look at updated numbers whenever it cares to
+ */
+ ice_update_pf_stats(pf);
+ ice_for_each_vsi(pf, i)
+ if (pf->vsi[i] && pf->vsi[i]->netdev)
+ ice_update_vsi_stats(pf->vsi[i]);
+}
+
+/**
+ * ice_init_link_events - enable/initialize link events
+ * @pi: pointer to the port_info instance
+ *
+ * Returns -EIO on failure, 0 on success
+ */
+static int ice_init_link_events(struct ice_port_info *pi)
+{
+ u16 mask;
+
+ mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA |
+ ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL |
+ ICE_AQ_LINK_EVENT_PHY_FW_LOAD_FAIL));
+
+ if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) {
+ dev_dbg(ice_hw_to_dev(pi->hw), "Failed to set link event mask for port %d\n",
+ pi->lport);
+ return -EIO;
+ }
+
+ if (ice_aq_get_link_info(pi, true, NULL, NULL)) {
+ dev_dbg(ice_hw_to_dev(pi->hw), "Failed to enable link events for port %d\n",
+ pi->lport);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_handle_link_event - handle link event via ARQ
+ * @pf: PF that the link event is associated with
+ * @event: event structure containing link status info
+ */
+static int
+ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event)
+{
+ struct ice_aqc_get_link_status_data *link_data;
+ struct ice_port_info *port_info;
+ int status;
+
+ link_data = (struct ice_aqc_get_link_status_data *)event->msg_buf;
+ port_info = pf->hw.port_info;
+ if (!port_info)
+ return -EINVAL;
+
+ status = ice_link_event(pf, port_info,
+ !!(link_data->link_info & ICE_AQ_LINK_UP),
+ le16_to_cpu(link_data->link_speed));
+ if (status)
+ dev_dbg(ice_pf_to_dev(pf), "Could not process link event, error %d\n",
+ status);
+
+ return status;
+}
+
+enum ice_aq_task_state {
+ ICE_AQ_TASK_WAITING = 0,
+ ICE_AQ_TASK_COMPLETE,
+ ICE_AQ_TASK_CANCELED,
+};
+
+struct ice_aq_task {
+ struct hlist_node entry;
+
+ u16 opcode;
+ struct ice_rq_event_info *event;
+ enum ice_aq_task_state state;
+};
+
+/**
+ * ice_aq_wait_for_event - Wait for an AdminQ event from firmware
+ * @pf: pointer to the PF private structure
+ * @opcode: the opcode to wait for
+ * @timeout: how long to wait, in jiffies
+ * @event: storage for the event info
+ *
+ * Waits for a specific AdminQ completion event on the ARQ for a given PF. The
+ * current thread will be put to sleep until the specified event occurs or
+ * until the given timeout is reached.
+ *
+ * To obtain only the descriptor contents, pass an event without an allocated
+ * msg_buf. If the complete data buffer is desired, allocate the
+ * event->msg_buf with enough space ahead of time.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
+ struct ice_rq_event_info *event)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_aq_task *task;
+ unsigned long start;
+ long ret;
+ int err;
+
+ task = kzalloc(sizeof(*task), GFP_KERNEL);
+ if (!task)
+ return -ENOMEM;
+
+ INIT_HLIST_NODE(&task->entry);
+ task->opcode = opcode;
+ task->event = event;
+ task->state = ICE_AQ_TASK_WAITING;
+
+ spin_lock_bh(&pf->aq_wait_lock);
+ hlist_add_head(&task->entry, &pf->aq_wait_list);
+ spin_unlock_bh(&pf->aq_wait_lock);
+
+ start = jiffies;
+
+ ret = wait_event_interruptible_timeout(pf->aq_wait_queue, task->state,
+ timeout);
+ switch (task->state) {
+ case ICE_AQ_TASK_WAITING:
+ err = ret < 0 ? ret : -ETIMEDOUT;
+ break;
+ case ICE_AQ_TASK_CANCELED:
+ err = ret < 0 ? ret : -ECANCELED;
+ break;
+ case ICE_AQ_TASK_COMPLETE:
+ err = ret < 0 ? ret : 0;
+ break;
+ default:
+ WARN(1, "Unexpected AdminQ wait task state %u", task->state);
+ err = -EINVAL;
+ break;
+ }
+
+ dev_dbg(dev, "Waited %u msecs (max %u msecs) for firmware response to op 0x%04x\n",
+ jiffies_to_msecs(jiffies - start),
+ jiffies_to_msecs(timeout),
+ opcode);
+
+ spin_lock_bh(&pf->aq_wait_lock);
+ hlist_del(&task->entry);
+ spin_unlock_bh(&pf->aq_wait_lock);
+ kfree(task);
+
+ return err;
+}
+
+/**
+ * ice_aq_check_events - Check if any thread is waiting for an AdminQ event
+ * @pf: pointer to the PF private structure
+ * @opcode: the opcode of the event
+ * @event: the event to check
+ *
+ * Loops over the current list of pending threads waiting for an AdminQ event.
+ * For each matching task, copy the contents of the event into the task
+ * structure and wake up the thread.
+ *
+ * If multiple threads wait for the same opcode, they will all be woken up.
+ *
+ * Note that event->msg_buf will only be duplicated if the event has a buffer
+ * with enough space already allocated. Otherwise, only the descriptor and
+ * message length will be copied.
+ *
+ * Returns: true if an event was found, false otherwise
+ */
+static void ice_aq_check_events(struct ice_pf *pf, u16 opcode,
+ struct ice_rq_event_info *event)
+{
+ struct ice_rq_event_info *task_ev;
+ struct ice_aq_task *task;
+ bool found = false;
+
+ spin_lock_bh(&pf->aq_wait_lock);
+ hlist_for_each_entry(task, &pf->aq_wait_list, entry) {
+ if (task->state || task->opcode != opcode)
+ continue;
+
+ task_ev = task->event;
+ memcpy(&task_ev->desc, &event->desc, sizeof(event->desc));
+ task_ev->msg_len = event->msg_len;
+
+ /* Only copy the data buffer if a destination was set */
+ if (task_ev->msg_buf && task_ev->buf_len >= event->buf_len) {
+ memcpy(task_ev->msg_buf, event->msg_buf,
+ event->buf_len);
+ task_ev->buf_len = event->buf_len;
+ }
+
+ task->state = ICE_AQ_TASK_COMPLETE;
+ found = true;
+ }
+ spin_unlock_bh(&pf->aq_wait_lock);
+
+ if (found)
+ wake_up(&pf->aq_wait_queue);
+}
+
+/**
+ * ice_aq_cancel_waiting_tasks - Immediately cancel all waiting tasks
+ * @pf: the PF private structure
+ *
+ * Set all waiting tasks to ICE_AQ_TASK_CANCELED, and wake up their threads.
+ * This will then cause ice_aq_wait_for_event to exit with -ECANCELED.
+ */
+static void ice_aq_cancel_waiting_tasks(struct ice_pf *pf)
+{
+ struct ice_aq_task *task;
+
+ spin_lock_bh(&pf->aq_wait_lock);
+ hlist_for_each_entry(task, &pf->aq_wait_list, entry)
+ task->state = ICE_AQ_TASK_CANCELED;
+ spin_unlock_bh(&pf->aq_wait_lock);
+
+ wake_up(&pf->aq_wait_queue);
+}
+
+/**
+ * __ice_clean_ctrlq - helper function to clean controlq rings
+ * @pf: ptr to struct ice_pf
+ * @q_type: specific Control queue type
+ */
+static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_rq_event_info event;
+ struct ice_hw *hw = &pf->hw;
+ struct ice_ctl_q_info *cq;
+ u16 pending, i = 0;
+ const char *qtype;
+ u32 oldval, val;
+
+ /* Do not clean control queue if/when PF reset fails */
+ if (test_bit(ICE_RESET_FAILED, pf->state))
+ return 0;
+
+ switch (q_type) {
+ case ICE_CTL_Q_ADMIN:
+ cq = &hw->adminq;
+ qtype = "Admin";
+ break;
+ case ICE_CTL_Q_SB:
+ cq = &hw->sbq;
+ qtype = "Sideband";
+ break;
+ case ICE_CTL_Q_MAILBOX:
+ cq = &hw->mailboxq;
+ qtype = "Mailbox";
+ /* we are going to try to detect a malicious VF, so set the
+ * state to begin detection
+ */
+ hw->mbx_snapshot.mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
+ break;
+ default:
+ dev_warn(dev, "Unknown control queue type 0x%x\n", q_type);
+ return 0;
+ }
+
+ /* check for error indications - PF_xx_AxQLEN register layout for
+ * FW/MBX/SB are identical so just use defines for PF_FW_AxQLEN.
+ */
+ val = rd32(hw, cq->rq.len);
+ if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
+ PF_FW_ARQLEN_ARQCRIT_M)) {
+ oldval = val;
+ if (val & PF_FW_ARQLEN_ARQVFE_M)
+ dev_dbg(dev, "%s Receive Queue VF Error detected\n",
+ qtype);
+ if (val & PF_FW_ARQLEN_ARQOVFL_M) {
+ dev_dbg(dev, "%s Receive Queue Overflow Error detected\n",
+ qtype);
+ }
+ if (val & PF_FW_ARQLEN_ARQCRIT_M)
+ dev_dbg(dev, "%s Receive Queue Critical Error detected\n",
+ qtype);
+ val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
+ PF_FW_ARQLEN_ARQCRIT_M);
+ if (oldval != val)
+ wr32(hw, cq->rq.len, val);
+ }
+
+ val = rd32(hw, cq->sq.len);
+ if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
+ PF_FW_ATQLEN_ATQCRIT_M)) {
+ oldval = val;
+ if (val & PF_FW_ATQLEN_ATQVFE_M)
+ dev_dbg(dev, "%s Send Queue VF Error detected\n",
+ qtype);
+ if (val & PF_FW_ATQLEN_ATQOVFL_M) {
+ dev_dbg(dev, "%s Send Queue Overflow Error detected\n",
+ qtype);
+ }
+ if (val & PF_FW_ATQLEN_ATQCRIT_M)
+ dev_dbg(dev, "%s Send Queue Critical Error detected\n",
+ qtype);
+ val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
+ PF_FW_ATQLEN_ATQCRIT_M);
+ if (oldval != val)
+ wr32(hw, cq->sq.len, val);
+ }
+
+ event.buf_len = cq->rq_buf_size;
+ event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
+ if (!event.msg_buf)
+ return 0;
+
+ do {
+ u16 opcode;
+ int ret;
+
+ ret = ice_clean_rq_elem(hw, cq, &event, &pending);
+ if (ret == -EALREADY)
+ break;
+ if (ret) {
+ dev_err(dev, "%s Receive Queue event error %d\n", qtype,
+ ret);
+ break;
+ }
+
+ opcode = le16_to_cpu(event.desc.opcode);
+
+ /* Notify any thread that might be waiting for this event */
+ ice_aq_check_events(pf, opcode, &event);
+
+ switch (opcode) {
+ case ice_aqc_opc_get_link_status:
+ if (ice_handle_link_event(pf, &event))
+ dev_err(dev, "Could not handle link event\n");
+ break;
+ case ice_aqc_opc_event_lan_overflow:
+ ice_vf_lan_overflow_event(pf, &event);
+ break;
+ case ice_mbx_opc_send_msg_to_pf:
+ if (!ice_is_malicious_vf(pf, &event, i, pending))
+ ice_vc_process_vf_msg(pf, &event);
+ break;
+ case ice_aqc_opc_fw_logging:
+ ice_output_fw_log(hw, &event.desc, event.msg_buf);
+ break;
+ case ice_aqc_opc_lldp_set_mib_change:
+ ice_dcb_process_lldp_set_mib_change(pf, &event);
+ break;
+ default:
+ dev_dbg(dev, "%s Receive Queue unknown event 0x%04x ignored\n",
+ qtype, opcode);
+ break;
+ }
+ } while (pending && (i++ < ICE_DFLT_IRQ_WORK));
+
+ kfree(event.msg_buf);
+
+ return pending && (i == ICE_DFLT_IRQ_WORK);
+}
+
+/**
+ * ice_ctrlq_pending - check if there is a difference between ntc and ntu
+ * @hw: pointer to hardware info
+ * @cq: control queue information
+ *
+ * returns true if there are pending messages in a queue, false if there aren't
+ */
+static bool ice_ctrlq_pending(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+ u16 ntu;
+
+ ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
+ return cq->rq.next_to_clean != ntu;
+}
+
+/**
+ * ice_clean_adminq_subtask - clean the AdminQ rings
+ * @pf: board private structure
+ */
+static void ice_clean_adminq_subtask(struct ice_pf *pf)
+{
+ struct ice_hw *hw = &pf->hw;
+
+ if (!test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
+ return;
+
+ if (__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN))
+ return;
+
+ clear_bit(ICE_ADMINQ_EVENT_PENDING, pf->state);
+
+ /* There might be a situation where new messages arrive to a control
+ * queue between processing the last message and clearing the
+ * EVENT_PENDING bit. So before exiting, check queue head again (using
+ * ice_ctrlq_pending) and process new messages if any.
+ */
+ if (ice_ctrlq_pending(hw, &hw->adminq))
+ __ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN);
+
+ ice_flush(hw);
+}
+
+/**
+ * ice_clean_mailboxq_subtask - clean the MailboxQ rings
+ * @pf: board private structure
+ */
+static void ice_clean_mailboxq_subtask(struct ice_pf *pf)
+{
+ struct ice_hw *hw = &pf->hw;
+
+ if (!test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state))
+ return;
+
+ if (__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX))
+ return;
+
+ clear_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state);
+
+ if (ice_ctrlq_pending(hw, &hw->mailboxq))
+ __ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX);
+
+ ice_flush(hw);
+}
+
+/**
+ * ice_clean_sbq_subtask - clean the Sideband Queue rings
+ * @pf: board private structure
+ */
+static void ice_clean_sbq_subtask(struct ice_pf *pf)
+{
+ struct ice_hw *hw = &pf->hw;
+
+ /* Nothing to do here if sideband queue is not supported */
+ if (!ice_is_sbq_supported(hw)) {
+ clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
+ return;
+ }
+
+ if (!test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state))
+ return;
+
+ if (__ice_clean_ctrlq(pf, ICE_CTL_Q_SB))
+ return;
+
+ clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
+
+ if (ice_ctrlq_pending(hw, &hw->sbq))
+ __ice_clean_ctrlq(pf, ICE_CTL_Q_SB);
+
+ ice_flush(hw);
+}
+
+/**
+ * ice_service_task_schedule - schedule the service task to wake up
+ * @pf: board private structure
+ *
+ * If not already scheduled, this puts the task into the work queue.
+ */
+void ice_service_task_schedule(struct ice_pf *pf)
+{
+ if (!test_bit(ICE_SERVICE_DIS, pf->state) &&
+ !test_and_set_bit(ICE_SERVICE_SCHED, pf->state) &&
+ !test_bit(ICE_NEEDS_RESTART, pf->state))
+ queue_work(ice_wq, &pf->serv_task);
+}
+
+/**
+ * ice_service_task_complete - finish up the service task
+ * @pf: board private structure
+ */
+static void ice_service_task_complete(struct ice_pf *pf)
+{
+ WARN_ON(!test_bit(ICE_SERVICE_SCHED, pf->state));
+
+ /* force memory (pf->state) to sync before next service task */
+ smp_mb__before_atomic();
+ clear_bit(ICE_SERVICE_SCHED, pf->state);
+}
+
+/**
+ * ice_service_task_stop - stop service task and cancel works
+ * @pf: board private structure
+ *
+ * Return 0 if the ICE_SERVICE_DIS bit was not already set,
+ * 1 otherwise.
+ */
+static int ice_service_task_stop(struct ice_pf *pf)
+{
+ int ret;
+
+ ret = test_and_set_bit(ICE_SERVICE_DIS, pf->state);
+
+ if (pf->serv_tmr.function)
+ del_timer_sync(&pf->serv_tmr);
+ if (pf->serv_task.func)
+ cancel_work_sync(&pf->serv_task);
+
+ clear_bit(ICE_SERVICE_SCHED, pf->state);
+ return ret;
+}
+
+/**
+ * ice_service_task_restart - restart service task and schedule works
+ * @pf: board private structure
+ *
+ * This function is needed for suspend and resume works (e.g WoL scenario)
+ */
+static void ice_service_task_restart(struct ice_pf *pf)
+{
+ clear_bit(ICE_SERVICE_DIS, pf->state);
+ ice_service_task_schedule(pf);
+}
+
+/**
+ * ice_service_timer - timer callback to schedule service task
+ * @t: pointer to timer_list
+ */
+static void ice_service_timer(struct timer_list *t)
+{
+ struct ice_pf *pf = from_timer(pf, t, serv_tmr);
+
+ mod_timer(&pf->serv_tmr, round_jiffies(pf->serv_tmr_period + jiffies));
+ ice_service_task_schedule(pf);
+}
+
+/**
+ * ice_handle_mdd_event - handle malicious driver detect event
+ * @pf: pointer to the PF structure
+ *
+ * Called from service task. OICR interrupt handler indicates MDD event.
+ * VF MDD logging is guarded by net_ratelimit. Additional PF and VF log
+ * messages are wrapped by netif_msg_[rx|tx]_err. Since VF Rx MDD events
+ * disable the queue, the PF can be configured to reset the VF using ethtool
+ * private flag mdd-auto-reset-vf.
+ */
+static void ice_handle_mdd_event(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ struct ice_vf *vf;
+ unsigned int bkt;
+ u32 reg;
+
+ if (!test_and_clear_bit(ICE_MDD_EVENT_PENDING, pf->state)) {
+ /* Since the VF MDD event logging is rate limited, check if
+ * there are pending MDD events.
+ */
+ ice_print_vfs_mdd_events(pf);
+ return;
+ }
+
+ /* find what triggered an MDD event */
+ reg = rd32(hw, GL_MDET_TX_PQM);
+ if (reg & GL_MDET_TX_PQM_VALID_M) {
+ u8 pf_num = (reg & GL_MDET_TX_PQM_PF_NUM_M) >>
+ GL_MDET_TX_PQM_PF_NUM_S;
+ u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >>
+ GL_MDET_TX_PQM_VF_NUM_S;
+ u8 event = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >>
+ GL_MDET_TX_PQM_MAL_TYPE_S;
+ u16 queue = ((reg & GL_MDET_TX_PQM_QNUM_M) >>
+ GL_MDET_TX_PQM_QNUM_S);
+
+ if (netif_msg_tx_err(pf))
+ dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
+ event, queue, pf_num, vf_num);
+ wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
+ }
+
+ reg = rd32(hw, GL_MDET_TX_TCLAN);
+ if (reg & GL_MDET_TX_TCLAN_VALID_M) {
+ u8 pf_num = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >>
+ GL_MDET_TX_TCLAN_PF_NUM_S;
+ u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >>
+ GL_MDET_TX_TCLAN_VF_NUM_S;
+ u8 event = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >>
+ GL_MDET_TX_TCLAN_MAL_TYPE_S;
+ u16 queue = ((reg & GL_MDET_TX_TCLAN_QNUM_M) >>
+ GL_MDET_TX_TCLAN_QNUM_S);
+
+ if (netif_msg_tx_err(pf))
+ dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
+ event, queue, pf_num, vf_num);
+ wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff);
+ }
+
+ reg = rd32(hw, GL_MDET_RX);
+ if (reg & GL_MDET_RX_VALID_M) {
+ u8 pf_num = (reg & GL_MDET_RX_PF_NUM_M) >>
+ GL_MDET_RX_PF_NUM_S;
+ u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >>
+ GL_MDET_RX_VF_NUM_S;
+ u8 event = (reg & GL_MDET_RX_MAL_TYPE_M) >>
+ GL_MDET_RX_MAL_TYPE_S;
+ u16 queue = ((reg & GL_MDET_RX_QNUM_M) >>
+ GL_MDET_RX_QNUM_S);
+
+ if (netif_msg_rx_err(pf))
+ dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
+ event, queue, pf_num, vf_num);
+ wr32(hw, GL_MDET_RX, 0xffffffff);
+ }
+
+ /* check to see if this PF caused an MDD event */
+ reg = rd32(hw, PF_MDET_TX_PQM);
+ if (reg & PF_MDET_TX_PQM_VALID_M) {
+ wr32(hw, PF_MDET_TX_PQM, 0xFFFF);
+ if (netif_msg_tx_err(pf))
+ dev_info(dev, "Malicious Driver Detection event TX_PQM detected on PF\n");
+ }
+
+ reg = rd32(hw, PF_MDET_TX_TCLAN);
+ if (reg & PF_MDET_TX_TCLAN_VALID_M) {
+ wr32(hw, PF_MDET_TX_TCLAN, 0xFFFF);
+ if (netif_msg_tx_err(pf))
+ dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on PF\n");
+ }
+
+ reg = rd32(hw, PF_MDET_RX);
+ if (reg & PF_MDET_RX_VALID_M) {
+ wr32(hw, PF_MDET_RX, 0xFFFF);
+ if (netif_msg_rx_err(pf))
+ dev_info(dev, "Malicious Driver Detection event RX detected on PF\n");
+ }
+
+ /* Check to see if one of the VFs caused an MDD event, and then
+ * increment counters and set print pending
+ */
+ mutex_lock(&pf->vfs.table_lock);
+ ice_for_each_vf(pf, bkt, vf) {
+ reg = rd32(hw, VP_MDET_TX_PQM(vf->vf_id));
+ if (reg & VP_MDET_TX_PQM_VALID_M) {
+ wr32(hw, VP_MDET_TX_PQM(vf->vf_id), 0xFFFF);
+ vf->mdd_tx_events.count++;
+ set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
+ if (netif_msg_tx_err(pf))
+ dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n",
+ vf->vf_id);
+ }
+
+ reg = rd32(hw, VP_MDET_TX_TCLAN(vf->vf_id));
+ if (reg & VP_MDET_TX_TCLAN_VALID_M) {
+ wr32(hw, VP_MDET_TX_TCLAN(vf->vf_id), 0xFFFF);
+ vf->mdd_tx_events.count++;
+ set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
+ if (netif_msg_tx_err(pf))
+ dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n",
+ vf->vf_id);
+ }
+
+ reg = rd32(hw, VP_MDET_TX_TDPU(vf->vf_id));
+ if (reg & VP_MDET_TX_TDPU_VALID_M) {
+ wr32(hw, VP_MDET_TX_TDPU(vf->vf_id), 0xFFFF);
+ vf->mdd_tx_events.count++;
+ set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
+ if (netif_msg_tx_err(pf))
+ dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n",
+ vf->vf_id);
+ }
+
+ reg = rd32(hw, VP_MDET_RX(vf->vf_id));
+ if (reg & VP_MDET_RX_VALID_M) {
+ wr32(hw, VP_MDET_RX(vf->vf_id), 0xFFFF);
+ vf->mdd_rx_events.count++;
+ set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
+ if (netif_msg_rx_err(pf))
+ dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n",
+ vf->vf_id);
+
+ /* Since the queue is disabled on VF Rx MDD events, the
+ * PF can be configured to reset the VF through ethtool
+ * private flag mdd-auto-reset-vf.
+ */
+ if (test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
+ /* VF MDD event counters will be cleared by
+ * reset, so print the event prior to reset.
+ */
+ ice_print_vf_rx_mdd_event(vf);
+ ice_reset_vf(vf, ICE_VF_RESET_LOCK);
+ }
+ }
+ }
+ mutex_unlock(&pf->vfs.table_lock);
+
+ ice_print_vfs_mdd_events(pf);
+}
+
+/**
+ * ice_force_phys_link_state - Force the physical link state
+ * @vsi: VSI to force the physical link state to up/down
+ * @link_up: true/false indicates to set the physical link to up/down
+ *
+ * Force the physical link state by getting the current PHY capabilities from
+ * hardware and setting the PHY config based on the determined capabilities. If
+ * link changes a link event will be triggered because both the Enable Automatic
+ * Link Update and LESM Enable bits are set when setting the PHY capabilities.
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
+{
+ struct ice_aqc_get_phy_caps_data *pcaps;
+ struct ice_aqc_set_phy_cfg_data *cfg;
+ struct ice_port_info *pi;
+ struct device *dev;
+ int retcode;
+
+ if (!vsi || !vsi->port_info || !vsi->back)
+ return -EINVAL;
+ if (vsi->type != ICE_VSI_PF)
+ return 0;
+
+ dev = ice_pf_to_dev(vsi->back);
+
+ pi = vsi->port_info;
+
+ pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+ if (!pcaps)
+ return -ENOMEM;
+
+ retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
+ NULL);
+ if (retcode) {
+ dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n",
+ vsi->vsi_num, retcode);
+ retcode = -EIO;
+ goto out;
+ }
+
+ /* No change in link */
+ if (link_up == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) &&
+ link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP))
+ goto out;
+
+ /* Use the current user PHY configuration. The current user PHY
+ * configuration is initialized during probe from PHY capabilities
+ * software mode, and updated on set PHY configuration.
+ */
+ cfg = kmemdup(&pi->phy.curr_user_phy_cfg, sizeof(*cfg), GFP_KERNEL);
+ if (!cfg) {
+ retcode = -ENOMEM;
+ goto out;
+ }
+
+ cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+ if (link_up)
+ cfg->caps |= ICE_AQ_PHY_ENA_LINK;
+ else
+ cfg->caps &= ~ICE_AQ_PHY_ENA_LINK;
+
+ retcode = ice_aq_set_phy_cfg(&vsi->back->hw, pi, cfg, NULL);
+ if (retcode) {
+ dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
+ vsi->vsi_num, retcode);
+ retcode = -EIO;
+ }
+
+ kfree(cfg);
+out:
+ kfree(pcaps);
+ return retcode;
+}
+
+/**
+ * ice_init_nvm_phy_type - Initialize the NVM PHY type
+ * @pi: port info structure
+ *
+ * Initialize nvm_phy_type_[low|high] for link lenient mode support
+ */
+static int ice_init_nvm_phy_type(struct ice_port_info *pi)
+{
+ struct ice_aqc_get_phy_caps_data *pcaps;
+ struct ice_pf *pf = pi->hw->back;
+ int err;
+
+ pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+ if (!pcaps)
+ return -ENOMEM;
+
+ err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA,
+ pcaps, NULL);
+
+ if (err) {
+ dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
+ goto out;
+ }
+
+ pf->nvm_phy_type_hi = pcaps->phy_type_high;
+ pf->nvm_phy_type_lo = pcaps->phy_type_low;
+
+out:
+ kfree(pcaps);
+ return err;
+}
+
+/**
+ * ice_init_link_dflt_override - Initialize link default override
+ * @pi: port info structure
+ *
+ * Initialize link default override and PHY total port shutdown during probe
+ */
+static void ice_init_link_dflt_override(struct ice_port_info *pi)
+{
+ struct ice_link_default_override_tlv *ldo;
+ struct ice_pf *pf = pi->hw->back;
+
+ ldo = &pf->link_dflt_override;
+ if (ice_get_link_default_override(ldo, pi))
+ return;
+
+ if (!(ldo->options & ICE_LINK_OVERRIDE_PORT_DIS))
+ return;
+
+ /* Enable Total Port Shutdown (override/replace link-down-on-close
+ * ethtool private flag) for ports with Port Disable bit set.
+ */
+ set_bit(ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags);
+ set_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags);
+}
+
+/**
+ * ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings
+ * @pi: port info structure
+ *
+ * If default override is enabled, initialize the user PHY cfg speed and FEC
+ * settings using the default override mask from the NVM.
+ *
+ * The PHY should only be configured with the default override settings the
+ * first time media is available. The ICE_LINK_DEFAULT_OVERRIDE_PENDING state
+ * is used to indicate that the user PHY cfg default override is initialized
+ * and the PHY has not been configured with the default override settings. The
+ * state is set here, and cleared in ice_configure_phy the first time the PHY is
+ * configured.
+ *
+ * This function should be called only if the FW doesn't support default
+ * configuration mode, as reported by ice_fw_supports_report_dflt_cfg.
+ */
+static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi)
+{
+ struct ice_link_default_override_tlv *ldo;
+ struct ice_aqc_set_phy_cfg_data *cfg;
+ struct ice_phy_info *phy = &pi->phy;
+ struct ice_pf *pf = pi->hw->back;
+
+ ldo = &pf->link_dflt_override;
+
+ /* If link default override is enabled, use to mask NVM PHY capabilities
+ * for speed and FEC default configuration.
+ */
+ cfg = &phy->curr_user_phy_cfg;
+
+ if (ldo->phy_type_low || ldo->phy_type_high) {
+ cfg->phy_type_low = pf->nvm_phy_type_lo &
+ cpu_to_le64(ldo->phy_type_low);
+ cfg->phy_type_high = pf->nvm_phy_type_hi &
+ cpu_to_le64(ldo->phy_type_high);
+ }
+ cfg->link_fec_opt = ldo->fec_options;
+ phy->curr_user_fec_req = ICE_FEC_AUTO;
+
+ set_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING, pf->state);
+}
+
+/**
+ * ice_init_phy_user_cfg - Initialize the PHY user configuration
+ * @pi: port info structure
+ *
+ * Initialize the current user PHY configuration, speed, FEC, and FC requested
+ * mode to default. The PHY defaults are from get PHY capabilities topology
+ * with media so call when media is first available. An error is returned if
+ * called when media is not available. The PHY initialization completed state is
+ * set here.
+ *
+ * These configurations are used when setting PHY
+ * configuration. The user PHY configuration is updated on set PHY
+ * configuration. Returns 0 on success, negative on failure
+ */
+static int ice_init_phy_user_cfg(struct ice_port_info *pi)
+{
+ struct ice_aqc_get_phy_caps_data *pcaps;
+ struct ice_phy_info *phy = &pi->phy;
+ struct ice_pf *pf = pi->hw->back;
+ int err;
+
+ if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
+ return -EIO;
+
+ pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+ if (!pcaps)
+ return -ENOMEM;
+
+ if (ice_fw_supports_report_dflt_cfg(pi->hw))
+ err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
+ pcaps, NULL);
+ else
+ err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
+ pcaps, NULL);
+ if (err) {
+ dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
+ goto err_out;
+ }
+
+ ice_copy_phy_caps_to_cfg(pi, pcaps, &pi->phy.curr_user_phy_cfg);
+
+ /* check if lenient mode is supported and enabled */
+ if (ice_fw_supports_link_override(pi->hw) &&
+ !(pcaps->module_compliance_enforcement &
+ ICE_AQC_MOD_ENFORCE_STRICT_MODE)) {
+ set_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags);
+
+ /* if the FW supports default PHY configuration mode, then the driver
+ * does not have to apply link override settings. If not,
+ * initialize user PHY configuration with link override values
+ */
+ if (!ice_fw_supports_report_dflt_cfg(pi->hw) &&
+ (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN)) {
+ ice_init_phy_cfg_dflt_override(pi);
+ goto out;
+ }
+ }
+
+ /* if link default override is not enabled, set user flow control and
+ * FEC settings based on what get_phy_caps returned
+ */
+ phy->curr_user_fec_req = ice_caps_to_fec_mode(pcaps->caps,
+ pcaps->link_fec_options);
+ phy->curr_user_fc_req = ice_caps_to_fc_mode(pcaps->caps);
+
+out:
+ phy->curr_user_speed_req = ICE_AQ_LINK_SPEED_M;
+ set_bit(ICE_PHY_INIT_COMPLETE, pf->state);
+err_out:
+ kfree(pcaps);
+ return err;
+}
+
+/**
+ * ice_configure_phy - configure PHY
+ * @vsi: VSI of PHY
+ *
+ * Set the PHY configuration. If the current PHY configuration is the same as
+ * the curr_user_phy_cfg, then do nothing to avoid link flap. Otherwise
+ * configure the based get PHY capabilities for topology with media.
+ */
+static int ice_configure_phy(struct ice_vsi *vsi)
+{
+ struct device *dev = ice_pf_to_dev(vsi->back);
+ struct ice_port_info *pi = vsi->port_info;
+ struct ice_aqc_get_phy_caps_data *pcaps;
+ struct ice_aqc_set_phy_cfg_data *cfg;
+ struct ice_phy_info *phy = &pi->phy;
+ struct ice_pf *pf = vsi->back;
+ int err;
+
+ /* Ensure we have media as we cannot configure a medialess port */
+ if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
+ return -ENOMEDIUM;
+
+ ice_print_topo_conflict(vsi);
+
+ if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags) &&
+ phy->link_info.topo_media_conflict == ICE_AQ_LINK_TOPO_UNSUPP_MEDIA)
+ return -EPERM;
+
+ if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags))
+ return ice_force_phys_link_state(vsi, true);
+
+ pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+ if (!pcaps)
+ return -ENOMEM;
+
+ /* Get current PHY config */
+ err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
+ NULL);
+ if (err) {
+ dev_err(dev, "Failed to get PHY configuration, VSI %d error %d\n",
+ vsi->vsi_num, err);
+ goto done;
+ }
+
+ /* If PHY enable link is configured and configuration has not changed,
+ * there's nothing to do
+ */
+ if (pcaps->caps & ICE_AQC_PHY_EN_LINK &&
+ ice_phy_caps_equals_cfg(pcaps, &phy->curr_user_phy_cfg))
+ goto done;
+
+ /* Use PHY topology as baseline for configuration */
+ memset(pcaps, 0, sizeof(*pcaps));
+ if (ice_fw_supports_report_dflt_cfg(pi->hw))
+ err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
+ pcaps, NULL);
+ else
+ err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
+ pcaps, NULL);
+ if (err) {
+ dev_err(dev, "Failed to get PHY caps, VSI %d error %d\n",
+ vsi->vsi_num, err);
+ goto done;
+ }
+
+ cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+ if (!cfg) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ ice_copy_phy_caps_to_cfg(pi, pcaps, cfg);
+
+ /* Speed - If default override pending, use curr_user_phy_cfg set in
+ * ice_init_phy_user_cfg_ldo.
+ */
+ if (test_and_clear_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING,
+ vsi->back->state)) {
+ cfg->phy_type_low = phy->curr_user_phy_cfg.phy_type_low;
+ cfg->phy_type_high = phy->curr_user_phy_cfg.phy_type_high;
+ } else {
+ u64 phy_low = 0, phy_high = 0;
+
+ ice_update_phy_type(&phy_low, &phy_high,
+ pi->phy.curr_user_speed_req);
+ cfg->phy_type_low = pcaps->phy_type_low & cpu_to_le64(phy_low);
+ cfg->phy_type_high = pcaps->phy_type_high &
+ cpu_to_le64(phy_high);
+ }
+
+ /* Can't provide what was requested; use PHY capabilities */
+ if (!cfg->phy_type_low && !cfg->phy_type_high) {
+ cfg->phy_type_low = pcaps->phy_type_low;
+ cfg->phy_type_high = pcaps->phy_type_high;
+ }
+
+ /* FEC */
+ ice_cfg_phy_fec(pi, cfg, phy->curr_user_fec_req);
+
+ /* Can't provide what was requested; use PHY capabilities */
+ if (cfg->link_fec_opt !=
+ (cfg->link_fec_opt & pcaps->link_fec_options)) {
+ cfg->caps |= pcaps->caps & ICE_AQC_PHY_EN_AUTO_FEC;
+ cfg->link_fec_opt = pcaps->link_fec_options;
+ }
+
+ /* Flow Control - always supported; no need to check against
+ * capabilities
+ */
+ ice_cfg_phy_fc(pi, cfg, phy->curr_user_fc_req);
+
+ /* Enable link and link update */
+ cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK;
+
+ err = ice_aq_set_phy_cfg(&pf->hw, pi, cfg, NULL);
+ if (err)
+ dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
+ vsi->vsi_num, err);
+
+ kfree(cfg);
+done:
+ kfree(pcaps);
+ return err;
+}
+
+/**
+ * ice_check_media_subtask - Check for media
+ * @pf: pointer to PF struct
+ *
+ * If media is available, then initialize PHY user configuration if it is not
+ * been, and configure the PHY if the interface is up.
+ */
+static void ice_check_media_subtask(struct ice_pf *pf)
+{
+ struct ice_port_info *pi;
+ struct ice_vsi *vsi;
+ int err;
+
+ /* No need to check for media if it's already present */
+ if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags))
+ return;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi)
+ return;
+
+ /* Refresh link info and check if media is present */
+ pi = vsi->port_info;
+ err = ice_update_link_info(pi);
+ if (err)
+ return;
+
+ ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err);
+
+ if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
+ if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state))
+ ice_init_phy_user_cfg(pi);
+
+ /* PHY settings are reset on media insertion, reconfigure
+ * PHY to preserve settings.
+ */
+ if (test_bit(ICE_VSI_DOWN, vsi->state) &&
+ test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags))
+ return;
+
+ err = ice_configure_phy(vsi);
+ if (!err)
+ clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+
+ /* A Link Status Event will be generated; the event handler
+ * will complete bringing the interface up
+ */
+ }
+}
+
+/**
+ * ice_service_task - manage and run subtasks
+ * @work: pointer to work_struct contained by the PF struct
+ */
+static void ice_service_task(struct work_struct *work)
+{
+ struct ice_pf *pf = container_of(work, struct ice_pf, serv_task);
+ unsigned long start_time = jiffies;
+
+ /* subtasks */
+
+ /* process reset requests first */
+ ice_reset_subtask(pf);
+
+ /* bail if a reset/recovery cycle is pending or rebuild failed */
+ if (ice_is_reset_in_progress(pf->state) ||
+ test_bit(ICE_SUSPENDED, pf->state) ||
+ test_bit(ICE_NEEDS_RESTART, pf->state)) {
+ ice_service_task_complete(pf);
+ return;
+ }
+
+ if (test_and_clear_bit(ICE_AUX_ERR_PENDING, pf->state)) {
+ struct iidc_event *event;
+
+ event = kzalloc(sizeof(*event), GFP_KERNEL);
+ if (event) {
+ set_bit(IIDC_EVENT_CRIT_ERR, event->type);
+ /* report the entire OICR value to AUX driver */
+ swap(event->reg, pf->oicr_err_reg);
+ ice_send_event_to_aux(pf, event);
+ kfree(event);
+ }
+ }
+
+ /* unplug aux dev per request, if an unplug request came in
+ * while processing a plug request, this will handle it
+ */
+ if (test_and_clear_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags))
+ ice_unplug_aux_dev(pf);
+
+ /* Plug aux device per request */
+ if (test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags))
+ ice_plug_aux_dev(pf);
+
+ if (test_and_clear_bit(ICE_FLAG_MTU_CHANGED, pf->flags)) {
+ struct iidc_event *event;
+
+ event = kzalloc(sizeof(*event), GFP_KERNEL);
+ if (event) {
+ set_bit(IIDC_EVENT_AFTER_MTU_CHANGE, event->type);
+ ice_send_event_to_aux(pf, event);
+ kfree(event);
+ }
+ }
+
+ ice_clean_adminq_subtask(pf);
+ ice_check_media_subtask(pf);
+ ice_check_for_hang_subtask(pf);
+ ice_sync_fltr_subtask(pf);
+ ice_handle_mdd_event(pf);
+ ice_watchdog_subtask(pf);
+
+ if (ice_is_safe_mode(pf)) {
+ ice_service_task_complete(pf);
+ return;
+ }
+
+ ice_process_vflr_event(pf);
+ ice_clean_mailboxq_subtask(pf);
+ ice_clean_sbq_subtask(pf);
+ ice_sync_arfs_fltrs(pf);
+ ice_flush_fdir_ctx(pf);
+
+ /* Clear ICE_SERVICE_SCHED flag to allow scheduling next event */
+ ice_service_task_complete(pf);
+
+ /* If the tasks have taken longer than one service timer period
+ * or there is more work to be done, reset the service timer to
+ * schedule the service task now.
+ */
+ if (time_after(jiffies, (start_time + pf->serv_tmr_period)) ||
+ test_bit(ICE_MDD_EVENT_PENDING, pf->state) ||
+ test_bit(ICE_VFLR_EVENT_PENDING, pf->state) ||
+ test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state) ||
+ test_bit(ICE_FD_VF_FLUSH_CTX, pf->state) ||
+ test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state) ||
+ test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
+ mod_timer(&pf->serv_tmr, jiffies);
+}
+
+/**
+ * ice_set_ctrlq_len - helper function to set controlq length
+ * @hw: pointer to the HW instance
+ */
+static void ice_set_ctrlq_len(struct ice_hw *hw)
+{
+ hw->adminq.num_rq_entries = ICE_AQ_LEN;
+ hw->adminq.num_sq_entries = ICE_AQ_LEN;
+ hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
+ hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
+ hw->mailboxq.num_rq_entries = PF_MBX_ARQLEN_ARQLEN_M;
+ hw->mailboxq.num_sq_entries = ICE_MBXSQ_LEN;
+ hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
+ hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
+ hw->sbq.num_rq_entries = ICE_SBQ_LEN;
+ hw->sbq.num_sq_entries = ICE_SBQ_LEN;
+ hw->sbq.rq_buf_size = ICE_SBQ_MAX_BUF_LEN;
+ hw->sbq.sq_buf_size = ICE_SBQ_MAX_BUF_LEN;
+}
+
+/**
+ * ice_schedule_reset - schedule a reset
+ * @pf: board private structure
+ * @reset: reset being requested
+ */
+int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+
+ /* bail out if earlier reset has failed */
+ if (test_bit(ICE_RESET_FAILED, pf->state)) {
+ dev_dbg(dev, "earlier reset has failed\n");
+ return -EIO;
+ }
+ /* bail if reset/recovery already in progress */
+ if (ice_is_reset_in_progress(pf->state)) {
+ dev_dbg(dev, "Reset already in progress\n");
+ return -EBUSY;
+ }
+
+ switch (reset) {
+ case ICE_RESET_PFR:
+ set_bit(ICE_PFR_REQ, pf->state);
+ break;
+ case ICE_RESET_CORER:
+ set_bit(ICE_CORER_REQ, pf->state);
+ break;
+ case ICE_RESET_GLOBR:
+ set_bit(ICE_GLOBR_REQ, pf->state);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ice_service_task_schedule(pf);
+ return 0;
+}
+
+/**
+ * ice_irq_affinity_notify - Callback for affinity changes
+ * @notify: context as to what irq was changed
+ * @mask: the new affinity mask
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * so that we may register to receive changes to the irq affinity masks.
+ */
+static void
+ice_irq_affinity_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+{
+ struct ice_q_vector *q_vector =
+ container_of(notify, struct ice_q_vector, affinity_notify);
+
+ cpumask_copy(&q_vector->affinity_mask, mask);
+}
+
+/**
+ * ice_irq_affinity_release - Callback for affinity notifier release
+ * @ref: internal core kernel usage
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * to inform the current notification subscriber that they will no longer
+ * receive notifications.
+ */
+static void ice_irq_affinity_release(struct kref __always_unused *ref) {}
+
+/**
+ * ice_vsi_ena_irq - Enable IRQ for the given VSI
+ * @vsi: the VSI being configured
+ */
+static int ice_vsi_ena_irq(struct ice_vsi *vsi)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ int i;
+
+ ice_for_each_q_vector(vsi, i)
+ ice_irq_dynamic_ena(hw, vsi, vsi->q_vectors[i]);
+
+ ice_flush(hw);
+ return 0;
+}
+
+/**
+ * ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI
+ * @vsi: the VSI being configured
+ * @basename: name for the vector
+ */
+static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
+{
+ int q_vectors = vsi->num_q_vectors;
+ struct ice_pf *pf = vsi->back;
+ int base = vsi->base_vector;
+ struct device *dev;
+ int rx_int_idx = 0;
+ int tx_int_idx = 0;
+ int vector, err;
+ int irq_num;
+
+ dev = ice_pf_to_dev(pf);
+ for (vector = 0; vector < q_vectors; vector++) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[vector];
+
+ irq_num = pf->msix_entries[base + vector].vector;
+
+ if (q_vector->tx.tx_ring && q_vector->rx.rx_ring) {
+ snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+ "%s-%s-%d", basename, "TxRx", rx_int_idx++);
+ tx_int_idx++;
+ } else if (q_vector->rx.rx_ring) {
+ snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+ "%s-%s-%d", basename, "rx", rx_int_idx++);
+ } else if (q_vector->tx.tx_ring) {
+ snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+ "%s-%s-%d", basename, "tx", tx_int_idx++);
+ } else {
+ /* skip this unused q_vector */
+ continue;
+ }
+ if (vsi->type == ICE_VSI_CTRL && vsi->vf)
+ err = devm_request_irq(dev, irq_num, vsi->irq_handler,
+ IRQF_SHARED, q_vector->name,
+ q_vector);
+ else
+ err = devm_request_irq(dev, irq_num, vsi->irq_handler,
+ 0, q_vector->name, q_vector);
+ if (err) {
+ netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n",
+ err);
+ goto free_q_irqs;
+ }
+
+ /* register for affinity change notifications */
+ if (!IS_ENABLED(CONFIG_RFS_ACCEL)) {
+ struct irq_affinity_notify *affinity_notify;
+
+ affinity_notify = &q_vector->affinity_notify;
+ affinity_notify->notify = ice_irq_affinity_notify;
+ affinity_notify->release = ice_irq_affinity_release;
+ irq_set_affinity_notifier(irq_num, affinity_notify);
+ }
+
+ /* assign the mask for this irq */
+ irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
+ }
+
+ err = ice_set_cpu_rx_rmap(vsi);
+ if (err) {
+ netdev_err(vsi->netdev, "Failed to setup CPU RMAP on VSI %u: %pe\n",
+ vsi->vsi_num, ERR_PTR(err));
+ goto free_q_irqs;
+ }
+
+ vsi->irqs_ready = true;
+ return 0;
+
+free_q_irqs:
+ while (vector) {
+ vector--;
+ irq_num = pf->msix_entries[base + vector].vector;
+ if (!IS_ENABLED(CONFIG_RFS_ACCEL))
+ irq_set_affinity_notifier(irq_num, NULL);
+ irq_set_affinity_hint(irq_num, NULL);
+ devm_free_irq(dev, irq_num, &vsi->q_vectors[vector]);
+ }
+ return err;
+}
+
+/**
+ * ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP
+ * @vsi: VSI to setup Tx rings used by XDP
+ *
+ * Return 0 on success and negative value on error
+ */
+static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
+{
+ struct device *dev = ice_pf_to_dev(vsi->back);
+ struct ice_tx_desc *tx_desc;
+ int i, j;
+
+ ice_for_each_xdp_txq(vsi, i) {
+ u16 xdp_q_idx = vsi->alloc_txq + i;
+ struct ice_tx_ring *xdp_ring;
+
+ xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL);
+
+ if (!xdp_ring)
+ goto free_xdp_rings;
+
+ xdp_ring->q_index = xdp_q_idx;
+ xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
+ xdp_ring->vsi = vsi;
+ xdp_ring->netdev = NULL;
+ xdp_ring->dev = dev;
+ xdp_ring->count = vsi->num_tx_desc;
+ xdp_ring->next_dd = ICE_RING_QUARTER(xdp_ring) - 1;
+ xdp_ring->next_rs = ICE_RING_QUARTER(xdp_ring) - 1;
+ WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);
+ if (ice_setup_tx_ring(xdp_ring))
+ goto free_xdp_rings;
+ ice_set_ring_xdp(xdp_ring);
+ spin_lock_init(&xdp_ring->tx_lock);
+ for (j = 0; j < xdp_ring->count; j++) {
+ tx_desc = ICE_TX_DESC(xdp_ring, j);
+ tx_desc->cmd_type_offset_bsz = 0;
+ }
+ }
+
+ return 0;
+
+free_xdp_rings:
+ for (; i >= 0; i--)
+ if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc)
+ ice_free_tx_ring(vsi->xdp_rings[i]);
+ return -ENOMEM;
+}
+
+/**
+ * ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
+ * @vsi: VSI to set the bpf prog on
+ * @prog: the bpf prog pointer
+ */
+static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+ struct bpf_prog *old_prog;
+ int i;
+
+ old_prog = xchg(&vsi->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ ice_for_each_rxq(vsi, i)
+ WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
+}
+
+/**
+ * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
+ * @vsi: VSI to bring up Tx rings used by XDP
+ * @prog: bpf program that will be assigned to VSI
+ *
+ * Return 0 on success and negative value on error
+ */
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+ u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+ int xdp_rings_rem = vsi->num_xdp_txq;
+ struct ice_pf *pf = vsi->back;
+ struct ice_qs_cfg xdp_qs_cfg = {
+ .qs_mutex = &pf->avail_q_mutex,
+ .pf_map = pf->avail_txqs,
+ .pf_map_size = pf->max_pf_txqs,
+ .q_count = vsi->num_xdp_txq,
+ .scatter_count = ICE_MAX_SCATTER_TXQS,
+ .vsi_map = vsi->txq_map,
+ .vsi_map_offset = vsi->alloc_txq,
+ .mapping_mode = ICE_VSI_MAP_CONTIG
+ };
+ struct device *dev;
+ int i, v_idx;
+ int status;
+
+ dev = ice_pf_to_dev(pf);
+ vsi->xdp_rings = devm_kcalloc(dev, vsi->num_xdp_txq,
+ sizeof(*vsi->xdp_rings), GFP_KERNEL);
+ if (!vsi->xdp_rings)
+ return -ENOMEM;
+
+ vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode;
+ if (__ice_vsi_get_qs(&xdp_qs_cfg))
+ goto err_map_xdp;
+
+ if (static_key_enabled(&ice_xdp_locking_key))
+ netdev_warn(vsi->netdev,
+ "Could not allocate one XDP Tx ring per CPU, XDP_TX/XDP_REDIRECT actions will be slower\n");
+
+ if (ice_xdp_alloc_setup_rings(vsi))
+ goto clear_xdp_rings;
+
+ /* follow the logic from ice_vsi_map_rings_to_vectors */
+ ice_for_each_q_vector(vsi, v_idx) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+ int xdp_rings_per_v, q_id, q_base;
+
+ xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
+ vsi->num_q_vectors - v_idx);
+ q_base = vsi->num_xdp_txq - xdp_rings_rem;
+
+ for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
+ struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_id];
+
+ xdp_ring->q_vector = q_vector;
+ xdp_ring->next = q_vector->tx.tx_ring;
+ q_vector->tx.tx_ring = xdp_ring;
+ }
+ xdp_rings_rem -= xdp_rings_per_v;
+ }
+
+ ice_for_each_rxq(vsi, i) {
+ if (static_key_enabled(&ice_xdp_locking_key)) {
+ vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq];
+ } else {
+ struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector;
+ struct ice_tx_ring *ring;
+
+ ice_for_each_tx_ring(ring, q_vector->tx) {
+ if (ice_ring_is_xdp(ring)) {
+ vsi->rx_rings[i]->xdp_ring = ring;
+ break;
+ }
+ }
+ }
+ ice_tx_xsk_pool(vsi, i);
+ }
+
+ /* omit the scheduler update if in reset path; XDP queues will be
+ * taken into account at the end of ice_vsi_rebuild, where
+ * ice_cfg_vsi_lan is being called
+ */
+ if (ice_is_reset_in_progress(pf->state))
+ return 0;
+
+ /* tell the Tx scheduler that right now we have
+ * additional queues
+ */
+ for (i = 0; i < vsi->tc_cfg.numtc; i++)
+ max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq;
+
+ status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+ max_txqs);
+ if (status) {
+ dev_err(dev, "Failed VSI LAN queue config for XDP, error: %d\n",
+ status);
+ goto clear_xdp_rings;
+ }
+
+ /* assign the prog only when it's not already present on VSI;
+ * this flow is a subject of both ethtool -L and ndo_bpf flows;
+ * VSI rebuild that happens under ethtool -L can expose us to
+ * the bpf_prog refcount issues as we would be swapping same
+ * bpf_prog pointers from vsi->xdp_prog and calling bpf_prog_put
+ * on it as it would be treated as an 'old_prog'; for ndo_bpf
+ * this is not harmful as dev_xdp_install bumps the refcount
+ * before calling the op exposed by the driver;
+ */
+ if (!ice_is_xdp_ena_vsi(vsi))
+ ice_vsi_assign_bpf_prog(vsi, prog);
+
+ return 0;
+clear_xdp_rings:
+ ice_for_each_xdp_txq(vsi, i)
+ if (vsi->xdp_rings[i]) {
+ kfree_rcu(vsi->xdp_rings[i], rcu);
+ vsi->xdp_rings[i] = NULL;
+ }
+
+err_map_xdp:
+ mutex_lock(&pf->avail_q_mutex);
+ ice_for_each_xdp_txq(vsi, i) {
+ clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+ vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+ }
+ mutex_unlock(&pf->avail_q_mutex);
+
+ devm_kfree(dev, vsi->xdp_rings);
+ return -ENOMEM;
+}
+
+/**
+ * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
+ * @vsi: VSI to remove XDP rings
+ *
+ * Detach XDP rings from irq vectors, clean up the PF bitmap and free
+ * resources
+ */
+int ice_destroy_xdp_rings(struct ice_vsi *vsi)
+{
+ u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+ struct ice_pf *pf = vsi->back;
+ int i, v_idx;
+
+ /* q_vectors are freed in reset path so there's no point in detaching
+ * rings; in case of rebuild being triggered not from reset bits
+ * in pf->state won't be set, so additionally check first q_vector
+ * against NULL
+ */
+ if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+ goto free_qmap;
+
+ ice_for_each_q_vector(vsi, v_idx) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+ struct ice_tx_ring *ring;
+
+ ice_for_each_tx_ring(ring, q_vector->tx)
+ if (!ring->tx_buf || !ice_ring_is_xdp(ring))
+ break;
+
+ /* restore the value of last node prior to XDP setup */
+ q_vector->tx.tx_ring = ring;
+ }
+
+free_qmap:
+ mutex_lock(&pf->avail_q_mutex);
+ ice_for_each_xdp_txq(vsi, i) {
+ clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+ vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+ }
+ mutex_unlock(&pf->avail_q_mutex);
+
+ ice_for_each_xdp_txq(vsi, i)
+ if (vsi->xdp_rings[i]) {
+ if (vsi->xdp_rings[i]->desc) {
+ synchronize_rcu();
+ ice_free_tx_ring(vsi->xdp_rings[i]);
+ }
+ kfree_rcu(vsi->xdp_rings[i], rcu);
+ vsi->xdp_rings[i] = NULL;
+ }
+
+ devm_kfree(ice_pf_to_dev(pf), vsi->xdp_rings);
+ vsi->xdp_rings = NULL;
+
+ if (static_key_enabled(&ice_xdp_locking_key))
+ static_branch_dec(&ice_xdp_locking_key);
+
+ if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+ return 0;
+
+ ice_vsi_assign_bpf_prog(vsi, NULL);
+
+ /* notify Tx scheduler that we destroyed XDP queues and bring
+ * back the old number of child nodes
+ */
+ for (i = 0; i < vsi->tc_cfg.numtc; i++)
+ max_txqs[i] = vsi->num_txq;
+
+ /* change number of XDP Tx queues to 0 */
+ vsi->num_xdp_txq = 0;
+
+ return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+ max_txqs);
+}
+
+/**
+ * ice_vsi_rx_napi_schedule - Schedule napi on RX queues from VSI
+ * @vsi: VSI to schedule napi on
+ */
+static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi)
+{
+ int i;
+
+ ice_for_each_rxq(vsi, i) {
+ struct ice_rx_ring *rx_ring = vsi->rx_rings[i];
+
+ if (rx_ring->xsk_pool)
+ napi_schedule(&rx_ring->q_vector->napi);
+ }
+}
+
+/**
+ * ice_vsi_determine_xdp_res - figure out how many Tx qs can XDP have
+ * @vsi: VSI to determine the count of XDP Tx qs
+ *
+ * returns 0 if Tx qs count is higher than at least half of CPU count,
+ * -ENOMEM otherwise
+ */
+int ice_vsi_determine_xdp_res(struct ice_vsi *vsi)
+{
+ u16 avail = ice_get_avail_txq_count(vsi->back);
+ u16 cpus = num_possible_cpus();
+
+ if (avail < cpus / 2)
+ return -ENOMEM;
+
+ vsi->num_xdp_txq = min_t(u16, avail, cpus);
+
+ if (vsi->num_xdp_txq < cpus)
+ static_branch_inc(&ice_xdp_locking_key);
+
+ return 0;
+}
+
+/**
+ * ice_xdp_setup_prog - Add or remove XDP eBPF program
+ * @vsi: VSI to setup XDP for
+ * @prog: XDP program
+ * @extack: netlink extended ack
+ */
+static int
+ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+{
+ int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
+ bool if_running = netif_running(vsi->netdev);
+ int ret = 0, xdp_ring_err = 0;
+
+ if (frame_size > vsi->rx_buf_len) {
+ NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
+ return -EOPNOTSUPP;
+ }
+
+ /* need to stop netdev while setting up the program for Rx rings */
+ if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
+ ret = ice_down(vsi);
+ if (ret) {
+ NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed");
+ return ret;
+ }
+ }
+
+ if (!ice_is_xdp_ena_vsi(vsi) && prog) {
+ xdp_ring_err = ice_vsi_determine_xdp_res(vsi);
+ if (xdp_ring_err) {
+ NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP");
+ } else {
+ xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
+ if (xdp_ring_err)
+ NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");
+ }
+ /* reallocate Rx queues that are used for zero-copy */
+ xdp_ring_err = ice_realloc_zc_buf(vsi, true);
+ if (xdp_ring_err)
+ NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed");
+ } else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
+ xdp_ring_err = ice_destroy_xdp_rings(vsi);
+ if (xdp_ring_err)
+ NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
+ /* reallocate Rx queues that were used for zero-copy */
+ xdp_ring_err = ice_realloc_zc_buf(vsi, false);
+ if (xdp_ring_err)
+ NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed");
+ } else {
+ /* safe to call even when prog == vsi->xdp_prog as
+ * dev_xdp_install in net/core/dev.c incremented prog's
+ * refcount so corresponding bpf_prog_put won't cause
+ * underflow
+ */
+ ice_vsi_assign_bpf_prog(vsi, prog);
+ }
+
+ if (if_running)
+ ret = ice_up(vsi);
+
+ if (!ret && prog)
+ ice_vsi_rx_napi_schedule(vsi);
+
+ return (ret || xdp_ring_err) ? -ENOMEM : 0;
+}
+
+/**
+ * ice_xdp_safe_mode - XDP handler for safe mode
+ * @dev: netdevice
+ * @xdp: XDP command
+ */
+static int ice_xdp_safe_mode(struct net_device __always_unused *dev,
+ struct netdev_bpf *xdp)
+{
+ NL_SET_ERR_MSG_MOD(xdp->extack,
+ "Please provide working DDP firmware package in order to use XDP\n"
+ "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst");
+ return -EOPNOTSUPP;
+}
+
+/**
+ * ice_xdp - implements XDP handler
+ * @dev: netdevice
+ * @xdp: XDP command
+ */
+static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ struct ice_netdev_priv *np = netdev_priv(dev);
+ struct ice_vsi *vsi = np->vsi;
+
+ if (vsi->type != ICE_VSI_PF) {
+ NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF VSI");
+ return -EINVAL;
+ }
+
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
+ case XDP_SETUP_XSK_POOL:
+ return ice_xsk_pool_setup(vsi, xdp->xsk.pool,
+ xdp->xsk.queue_id);
+ default:
+ return -EINVAL;
+ }
+}
+
+/**
+ * ice_ena_misc_vector - enable the non-queue interrupts
+ * @pf: board private structure
+ */
+static void ice_ena_misc_vector(struct ice_pf *pf)
+{
+ struct ice_hw *hw = &pf->hw;
+ u32 val;
+
+ /* Disable anti-spoof detection interrupt to prevent spurious event
+ * interrupts during a function reset. Anti-spoof functionally is
+ * still supported.
+ */
+ val = rd32(hw, GL_MDCK_TX_TDPU);
+ val |= GL_MDCK_TX_TDPU_RCU_ANTISPOOF_ITR_DIS_M;
+ wr32(hw, GL_MDCK_TX_TDPU, val);
+
+ /* clear things first */
+ wr32(hw, PFINT_OICR_ENA, 0); /* disable all */
+ rd32(hw, PFINT_OICR); /* read to clear */
+
+ val = (PFINT_OICR_ECC_ERR_M |
+ PFINT_OICR_MAL_DETECT_M |
+ PFINT_OICR_GRST_M |
+ PFINT_OICR_PCI_EXCEPTION_M |
+ PFINT_OICR_VFLR_M |
+ PFINT_OICR_HMC_ERR_M |
+ PFINT_OICR_PE_PUSH_M |
+ PFINT_OICR_PE_CRITERR_M);
+
+ wr32(hw, PFINT_OICR_ENA, val);
+
+ /* SW_ITR_IDX = 0, but don't change INTENA */
+ wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
+ GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M);
+}
+
+/**
+ * ice_misc_intr - misc interrupt handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
+{
+ struct ice_pf *pf = (struct ice_pf *)data;
+ struct ice_hw *hw = &pf->hw;
+ irqreturn_t ret = IRQ_NONE;
+ struct device *dev;
+ u32 oicr, ena_mask;
+
+ dev = ice_pf_to_dev(pf);
+ set_bit(ICE_ADMINQ_EVENT_PENDING, pf->state);
+ set_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state);
+ set_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
+
+ oicr = rd32(hw, PFINT_OICR);
+ ena_mask = rd32(hw, PFINT_OICR_ENA);
+
+ if (oicr & PFINT_OICR_SWINT_M) {
+ ena_mask &= ~PFINT_OICR_SWINT_M;
+ pf->sw_int_count++;
+ }
+
+ if (oicr & PFINT_OICR_MAL_DETECT_M) {
+ ena_mask &= ~PFINT_OICR_MAL_DETECT_M;
+ set_bit(ICE_MDD_EVENT_PENDING, pf->state);
+ }
+ if (oicr & PFINT_OICR_VFLR_M) {
+ /* disable any further VFLR event notifications */
+ if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) {
+ u32 reg = rd32(hw, PFINT_OICR_ENA);
+
+ reg &= ~PFINT_OICR_VFLR_M;
+ wr32(hw, PFINT_OICR_ENA, reg);
+ } else {
+ ena_mask &= ~PFINT_OICR_VFLR_M;
+ set_bit(ICE_VFLR_EVENT_PENDING, pf->state);
+ }
+ }
+
+ if (oicr & PFINT_OICR_GRST_M) {
+ u32 reset;
+
+ /* we have a reset warning */
+ ena_mask &= ~PFINT_OICR_GRST_M;
+ reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
+ GLGEN_RSTAT_RESET_TYPE_S;
+
+ if (reset == ICE_RESET_CORER)
+ pf->corer_count++;
+ else if (reset == ICE_RESET_GLOBR)
+ pf->globr_count++;
+ else if (reset == ICE_RESET_EMPR)
+ pf->empr_count++;
+ else
+ dev_dbg(dev, "Invalid reset type %d\n", reset);
+
+ /* If a reset cycle isn't already in progress, we set a bit in
+ * pf->state so that the service task can start a reset/rebuild.
+ */
+ if (!test_and_set_bit(ICE_RESET_OICR_RECV, pf->state)) {
+ if (reset == ICE_RESET_CORER)
+ set_bit(ICE_CORER_RECV, pf->state);
+ else if (reset == ICE_RESET_GLOBR)
+ set_bit(ICE_GLOBR_RECV, pf->state);
+ else
+ set_bit(ICE_EMPR_RECV, pf->state);
+
+ /* There are couple of different bits at play here.
+ * hw->reset_ongoing indicates whether the hardware is
+ * in reset. This is set to true when a reset interrupt
+ * is received and set back to false after the driver
+ * has determined that the hardware is out of reset.
+ *
+ * ICE_RESET_OICR_RECV in pf->state indicates
+ * that a post reset rebuild is required before the
+ * driver is operational again. This is set above.
+ *
+ * As this is the start of the reset/rebuild cycle, set
+ * both to indicate that.
+ */
+ hw->reset_ongoing = true;
+ }
+ }
+
+ if (oicr & PFINT_OICR_TSYN_TX_M) {
+ ena_mask &= ~PFINT_OICR_TSYN_TX_M;
+ if (!hw->reset_ongoing) {
+ set_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread);
+ ret = IRQ_WAKE_THREAD;
+ }
+ }
+
+ if (oicr & PFINT_OICR_TSYN_EVNT_M) {
+ u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+ u32 gltsyn_stat = rd32(hw, GLTSYN_STAT(tmr_idx));
+
+ ena_mask &= ~PFINT_OICR_TSYN_EVNT_M;
+
+ if (hw->func_caps.ts_func_info.src_tmr_owned) {
+ /* Save EVENTs from GLTSYN register */
+ pf->ptp.ext_ts_irq |= gltsyn_stat &
+ (GLTSYN_STAT_EVENT0_M |
+ GLTSYN_STAT_EVENT1_M |
+ GLTSYN_STAT_EVENT2_M);
+
+ set_bit(ICE_MISC_THREAD_EXTTS_EVENT, pf->misc_thread);
+ ret = IRQ_WAKE_THREAD;
+ }
+ }
+
+#define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M)
+ if (oicr & ICE_AUX_CRIT_ERR) {
+ pf->oicr_err_reg |= oicr;
+ set_bit(ICE_AUX_ERR_PENDING, pf->state);
+ ena_mask &= ~ICE_AUX_CRIT_ERR;
+ }
+
+ /* Report any remaining unexpected interrupts */
+ oicr &= ena_mask;
+ if (oicr) {
+ dev_dbg(dev, "unhandled interrupt oicr=0x%08x\n", oicr);
+ /* If a critical error is pending there is no choice but to
+ * reset the device.
+ */
+ if (oicr & (PFINT_OICR_PCI_EXCEPTION_M |
+ PFINT_OICR_ECC_ERR_M)) {
+ set_bit(ICE_PFR_REQ, pf->state);
+ ice_service_task_schedule(pf);
+ }
+ }
+ if (!ret)
+ ret = IRQ_HANDLED;
+
+ ice_service_task_schedule(pf);
+ ice_irq_dynamic_ena(hw, NULL, NULL);
+
+ return ret;
+}
+
+/**
+ * ice_misc_intr_thread_fn - misc interrupt thread function
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data)
+{
+ struct ice_pf *pf = data;
+
+ if (ice_is_reset_in_progress(pf->state))
+ return IRQ_HANDLED;
+
+ if (test_and_clear_bit(ICE_MISC_THREAD_EXTTS_EVENT, pf->misc_thread))
+ ice_ptp_extts_event(pf);
+
+ if (test_and_clear_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread)) {
+ while (!ice_ptp_process_ts(pf))
+ usleep_range(50, 100);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * ice_dis_ctrlq_interrupts - disable control queue interrupts
+ * @hw: pointer to HW structure
+ */
+static void ice_dis_ctrlq_interrupts(struct ice_hw *hw)
+{
+ /* disable Admin queue Interrupt causes */
+ wr32(hw, PFINT_FW_CTL,
+ rd32(hw, PFINT_FW_CTL) & ~PFINT_FW_CTL_CAUSE_ENA_M);
+
+ /* disable Mailbox queue Interrupt causes */
+ wr32(hw, PFINT_MBX_CTL,
+ rd32(hw, PFINT_MBX_CTL) & ~PFINT_MBX_CTL_CAUSE_ENA_M);
+
+ wr32(hw, PFINT_SB_CTL,
+ rd32(hw, PFINT_SB_CTL) & ~PFINT_SB_CTL_CAUSE_ENA_M);
+
+ /* disable Control queue Interrupt causes */
+ wr32(hw, PFINT_OICR_CTL,
+ rd32(hw, PFINT_OICR_CTL) & ~PFINT_OICR_CTL_CAUSE_ENA_M);
+
+ ice_flush(hw);
+}
+
+/**
+ * ice_free_irq_msix_misc - Unroll misc vector setup
+ * @pf: board private structure
+ */
+static void ice_free_irq_msix_misc(struct ice_pf *pf)
+{
+ struct ice_hw *hw = &pf->hw;
+
+ ice_dis_ctrlq_interrupts(hw);
+
+ /* disable OICR interrupt */
+ wr32(hw, PFINT_OICR_ENA, 0);
+ ice_flush(hw);
+
+ if (pf->msix_entries) {
+ synchronize_irq(pf->msix_entries[pf->oicr_idx].vector);
+ devm_free_irq(ice_pf_to_dev(pf),
+ pf->msix_entries[pf->oicr_idx].vector, pf);
+ }
+
+ pf->num_avail_sw_msix += 1;
+ ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID);
+}
+
+/**
+ * ice_ena_ctrlq_interrupts - enable control queue interrupts
+ * @hw: pointer to HW structure
+ * @reg_idx: HW vector index to associate the control queue interrupts with
+ */
+static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx)
+{
+ u32 val;
+
+ val = ((reg_idx & PFINT_OICR_CTL_MSIX_INDX_M) |
+ PFINT_OICR_CTL_CAUSE_ENA_M);
+ wr32(hw, PFINT_OICR_CTL, val);
+
+ /* enable Admin queue Interrupt causes */
+ val = ((reg_idx & PFINT_FW_CTL_MSIX_INDX_M) |
+ PFINT_FW_CTL_CAUSE_ENA_M);
+ wr32(hw, PFINT_FW_CTL, val);
+
+ /* enable Mailbox queue Interrupt causes */
+ val = ((reg_idx & PFINT_MBX_CTL_MSIX_INDX_M) |
+ PFINT_MBX_CTL_CAUSE_ENA_M);
+ wr32(hw, PFINT_MBX_CTL, val);
+
+ /* This enables Sideband queue Interrupt causes */
+ val = ((reg_idx & PFINT_SB_CTL_MSIX_INDX_M) |
+ PFINT_SB_CTL_CAUSE_ENA_M);
+ wr32(hw, PFINT_SB_CTL, val);
+
+ ice_flush(hw);
+}
+
+/**
+ * ice_req_irq_msix_misc - Setup the misc vector to handle non queue events
+ * @pf: board private structure
+ *
+ * This sets up the handler for MSIX 0, which is used to manage the
+ * non-queue interrupts, e.g. AdminQ and errors. This is not used
+ * when in MSI or Legacy interrupt mode.
+ */
+static int ice_req_irq_msix_misc(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ int oicr_idx, err = 0;
+
+ if (!pf->int_name[0])
+ snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc",
+ dev_driver_string(dev), dev_name(dev));
+
+ /* Do not request IRQ but do enable OICR interrupt since settings are
+ * lost during reset. Note that this function is called only during
+ * rebuild path and not while reset is in progress.
+ */
+ if (ice_is_reset_in_progress(pf->state))
+ goto skip_req_irq;
+
+ /* reserve one vector in irq_tracker for misc interrupts */
+ oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
+ if (oicr_idx < 0)
+ return oicr_idx;
+
+ pf->num_avail_sw_msix -= 1;
+ pf->oicr_idx = (u16)oicr_idx;
+
+ err = devm_request_threaded_irq(dev,
+ pf->msix_entries[pf->oicr_idx].vector,
+ ice_misc_intr, ice_misc_intr_thread_fn,
+ 0, pf->int_name, pf);
+ if (err) {
+ dev_err(dev, "devm_request_threaded_irq for %s failed: %d\n",
+ pf->int_name, err);
+ ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
+ pf->num_avail_sw_msix += 1;
+ return err;
+ }
+
+skip_req_irq:
+ ice_ena_misc_vector(pf);
+
+ ice_ena_ctrlq_interrupts(hw, pf->oicr_idx);
+ wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx),
+ ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S);
+
+ ice_flush(hw);
+ ice_irq_dynamic_ena(hw, NULL, NULL);
+
+ return 0;
+}
+
+/**
+ * ice_napi_add - register NAPI handler for the VSI
+ * @vsi: VSI for which NAPI handler is to be registered
+ *
+ * This function is only called in the driver's load path. Registering the NAPI
+ * handler is done in ice_vsi_alloc_q_vector() for all other cases (i.e. resume,
+ * reset/rebuild, etc.)
+ */
+static void ice_napi_add(struct ice_vsi *vsi)
+{
+ int v_idx;
+
+ if (!vsi->netdev)
+ return;
+
+ ice_for_each_q_vector(vsi, v_idx)
+ netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi,
+ ice_napi_poll);
+}
+
+/**
+ * ice_set_ops - set netdev and ethtools ops for the given netdev
+ * @netdev: netdev instance
+ */
+static void ice_set_ops(struct net_device *netdev)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+ if (ice_is_safe_mode(pf)) {
+ netdev->netdev_ops = &ice_netdev_safe_mode_ops;
+ ice_set_ethtool_safe_mode_ops(netdev);
+ return;
+ }
+
+ netdev->netdev_ops = &ice_netdev_ops;
+ netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
+ ice_set_ethtool_ops(netdev);
+}
+
+/**
+ * ice_set_netdev_features - set features for the given netdev
+ * @netdev: netdev instance
+ */
+static void ice_set_netdev_features(struct net_device *netdev)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ bool is_dvm_ena = ice_is_dvm_ena(&pf->hw);
+ netdev_features_t csumo_features;
+ netdev_features_t vlano_features;
+ netdev_features_t dflt_features;
+ netdev_features_t tso_features;
+
+ if (ice_is_safe_mode(pf)) {
+ /* safe mode */
+ netdev->features = NETIF_F_SG | NETIF_F_HIGHDMA;
+ netdev->hw_features = netdev->features;
+ return;
+ }
+
+ dflt_features = NETIF_F_SG |
+ NETIF_F_HIGHDMA |
+ NETIF_F_NTUPLE |
+ NETIF_F_RXHASH;
+
+ csumo_features = NETIF_F_RXCSUM |
+ NETIF_F_IP_CSUM |
+ NETIF_F_SCTP_CRC |
+ NETIF_F_IPV6_CSUM;
+
+ vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER |
+ NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_CTAG_RX;
+
+ /* Enable CTAG/STAG filtering by default in Double VLAN Mode (DVM) */
+ if (is_dvm_ena)
+ vlano_features |= NETIF_F_HW_VLAN_STAG_FILTER;
+
+ tso_features = NETIF_F_TSO |
+ NETIF_F_TSO_ECN |
+ NETIF_F_TSO6 |
+ NETIF_F_GSO_GRE |
+ NETIF_F_GSO_UDP_TUNNEL |
+ NETIF_F_GSO_GRE_CSUM |
+ NETIF_F_GSO_UDP_TUNNEL_CSUM |
+ NETIF_F_GSO_PARTIAL |
+ NETIF_F_GSO_IPXIP4 |
+ NETIF_F_GSO_IPXIP6 |
+ NETIF_F_GSO_UDP_L4;
+
+ netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM |
+ NETIF_F_GSO_GRE_CSUM;
+ /* set features that user can change */
+ netdev->hw_features = dflt_features | csumo_features |
+ vlano_features | tso_features;
+
+ /* add support for HW_CSUM on packets with MPLS header */
+ netdev->mpls_features = NETIF_F_HW_CSUM |
+ NETIF_F_TSO |
+ NETIF_F_TSO6;
+
+ /* enable features */
+ netdev->features |= netdev->hw_features;
+
+ netdev->hw_features |= NETIF_F_HW_TC;
+ netdev->hw_features |= NETIF_F_LOOPBACK;
+
+ /* encap and VLAN devices inherit default, csumo and tso features */
+ netdev->hw_enc_features |= dflt_features | csumo_features |
+ tso_features;
+ netdev->vlan_features |= dflt_features | csumo_features |
+ tso_features;
+
+ /* advertise support but don't enable by default since only one type of
+ * VLAN offload can be enabled at a time (i.e. CTAG or STAG). When one
+ * type turns on the other has to be turned off. This is enforced by the
+ * ice_fix_features() ndo callback.
+ */
+ if (is_dvm_ena)
+ netdev->hw_features |= NETIF_F_HW_VLAN_STAG_RX |
+ NETIF_F_HW_VLAN_STAG_TX;
+
+ /* Leave CRC / FCS stripping enabled by default, but allow the value to
+ * be changed at runtime
+ */
+ netdev->hw_features |= NETIF_F_RXFCS;
+}
+
+/**
+ * ice_cfg_netdev - Allocate, configure and register a netdev
+ * @vsi: the VSI associated with the new netdev
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int ice_cfg_netdev(struct ice_vsi *vsi)
+{
+ struct ice_netdev_priv *np;
+ struct net_device *netdev;
+ u8 mac_addr[ETH_ALEN];
+
+ netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq,
+ vsi->alloc_rxq);
+ if (!netdev)
+ return -ENOMEM;
+
+ set_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
+ vsi->netdev = netdev;
+ np = netdev_priv(netdev);
+ np->vsi = vsi;
+
+ ice_set_netdev_features(netdev);
+
+ ice_set_ops(netdev);
+
+ if (vsi->type == ICE_VSI_PF) {
+ SET_NETDEV_DEV(netdev, ice_pf_to_dev(vsi->back));
+ ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
+ eth_hw_addr_set(netdev, mac_addr);
+ ether_addr_copy(netdev->perm_addr, mac_addr);
+ }
+
+ netdev->priv_flags |= IFF_UNICAST_FLT;
+
+ /* Setup netdev TC information */
+ ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc);
+
+ /* setup watchdog timeout value to be 5 second */
+ netdev->watchdog_timeo = 5 * HZ;
+
+ netdev->min_mtu = ETH_MIN_MTU;
+ netdev->max_mtu = ICE_MAX_MTU;
+
+ return 0;
+}
+
+/**
+ * ice_fill_rss_lut - Fill the RSS lookup table with default values
+ * @lut: Lookup table
+ * @rss_table_size: Lookup table size
+ * @rss_size: Range of queue number for hashing
+ */
+void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size)
+{
+ u16 i;
+
+ for (i = 0; i < rss_table_size; i++)
+ lut[i] = i % rss_size;
+}
+
+/**
+ * ice_pf_vsi_setup - Set up a PF VSI
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ *
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
+ */
+static struct ice_vsi *
+ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
+{
+ return ice_vsi_setup(pf, pi, ICE_VSI_PF, NULL, NULL);
+}
+
+static struct ice_vsi *
+ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
+ struct ice_channel *ch)
+{
+ return ice_vsi_setup(pf, pi, ICE_VSI_CHNL, NULL, ch);
+}
+
+/**
+ * ice_ctrl_vsi_setup - Set up a control VSI
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ *
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
+ */
+static struct ice_vsi *
+ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
+{
+ return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, NULL, NULL);
+}
+
+/**
+ * ice_lb_vsi_setup - Set up a loopback VSI
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ *
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
+ */
+struct ice_vsi *
+ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
+{
+ return ice_vsi_setup(pf, pi, ICE_VSI_LB, NULL, NULL);
+}
+
+/**
+ * ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload
+ * @netdev: network interface to be adjusted
+ * @proto: VLAN TPID
+ * @vid: VLAN ID to be added
+ *
+ * net_device_ops implementation for adding VLAN IDs
+ */
+static int
+ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi_vlan_ops *vlan_ops;
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_vlan vlan;
+ int ret;
+
+ /* VLAN 0 is added by default during load/reset */
+ if (!vid)
+ return 0;
+
+ while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
+ usleep_range(1000, 2000);
+
+ /* Add multicast promisc rule for the VLAN ID to be added if
+ * all-multicast is currently enabled.
+ */
+ if (vsi->current_netdev_flags & IFF_ALLMULTI) {
+ ret = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
+ ICE_MCAST_VLAN_PROMISC_BITS,
+ vid);
+ if (ret)
+ goto finish;
+ }
+
+ vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+
+ /* Add a switch rule for this VLAN ID so its corresponding VLAN tagged
+ * packets aren't pruned by the device's internal switch on Rx
+ */
+ vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0);
+ ret = vlan_ops->add_vlan(vsi, &vlan);
+ if (ret)
+ goto finish;
+
+ /* If all-multicast is currently enabled and this VLAN ID is only one
+ * besides VLAN-0 we have to update look-up type of multicast promisc
+ * rule for VLAN-0 from ICE_SW_LKUP_PROMISC to ICE_SW_LKUP_PROMISC_VLAN.
+ */
+ if ((vsi->current_netdev_flags & IFF_ALLMULTI) &&
+ ice_vsi_num_non_zero_vlans(vsi) == 1) {
+ ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
+ ICE_MCAST_PROMISC_BITS, 0);
+ ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
+ ICE_MCAST_VLAN_PROMISC_BITS, 0);
+ }
+
+finish:
+ clear_bit(ICE_CFG_BUSY, vsi->state);
+
+ return ret;
+}
+
+/**
+ * ice_vlan_rx_kill_vid - Remove a VLAN ID filter from HW offload
+ * @netdev: network interface to be adjusted
+ * @proto: VLAN TPID
+ * @vid: VLAN ID to be removed
+ *
+ * net_device_ops implementation for removing VLAN IDs
+ */
+static int
+ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi_vlan_ops *vlan_ops;
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_vlan vlan;
+ int ret;
+
+ /* don't allow removal of VLAN 0 */
+ if (!vid)
+ return 0;
+
+ while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
+ usleep_range(1000, 2000);
+
+ ret = ice_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
+ ICE_MCAST_VLAN_PROMISC_BITS, vid);
+ if (ret) {
+ netdev_err(netdev, "Error clearing multicast promiscuous mode on VSI %i\n",
+ vsi->vsi_num);
+ vsi->current_netdev_flags |= IFF_ALLMULTI;
+ }
+
+ vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+
+ /* Make sure VLAN delete is successful before updating VLAN
+ * information
+ */
+ vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0);
+ ret = vlan_ops->del_vlan(vsi, &vlan);
+ if (ret)
+ goto finish;
+
+ /* Remove multicast promisc rule for the removed VLAN ID if
+ * all-multicast is enabled.
+ */
+ if (vsi->current_netdev_flags & IFF_ALLMULTI)
+ ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
+ ICE_MCAST_VLAN_PROMISC_BITS, vid);
+
+ if (!ice_vsi_has_non_zero_vlans(vsi)) {
+ /* Update look-up type of multicast promisc rule for VLAN 0
+ * from ICE_SW_LKUP_PROMISC_VLAN to ICE_SW_LKUP_PROMISC when
+ * all-multicast is enabled and VLAN 0 is the only VLAN rule.
+ */
+ if (vsi->current_netdev_flags & IFF_ALLMULTI) {
+ ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
+ ICE_MCAST_VLAN_PROMISC_BITS,
+ 0);
+ ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
+ ICE_MCAST_PROMISC_BITS, 0);
+ }
+ }
+
+finish:
+ clear_bit(ICE_CFG_BUSY, vsi->state);
+
+ return ret;
+}
+
+/**
+ * ice_rep_indr_tc_block_unbind
+ * @cb_priv: indirection block private data
+ */
+static void ice_rep_indr_tc_block_unbind(void *cb_priv)
+{
+ struct ice_indr_block_priv *indr_priv = cb_priv;
+
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+}
+
+/**
+ * ice_tc_indir_block_unregister - Unregister TC indirect block notifications
+ * @vsi: VSI struct which has the netdev
+ */
+static void ice_tc_indir_block_unregister(struct ice_vsi *vsi)
+{
+ struct ice_netdev_priv *np = netdev_priv(vsi->netdev);
+
+ flow_indr_dev_unregister(ice_indr_setup_tc_cb, np,
+ ice_rep_indr_tc_block_unbind);
+}
+
+/**
+ * ice_tc_indir_block_remove - clean indirect TC block notifications
+ * @pf: PF structure
+ */
+static void ice_tc_indir_block_remove(struct ice_pf *pf)
+{
+ struct ice_vsi *pf_vsi = ice_get_main_vsi(pf);
+
+ if (!pf_vsi)
+ return;
+
+ ice_tc_indir_block_unregister(pf_vsi);
+}
+
+/**
+ * ice_tc_indir_block_register - Register TC indirect block notifications
+ * @vsi: VSI struct which has the netdev
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int ice_tc_indir_block_register(struct ice_vsi *vsi)
+{
+ struct ice_netdev_priv *np;
+
+ if (!vsi || !vsi->netdev)
+ return -EINVAL;
+
+ np = netdev_priv(vsi->netdev);
+
+ INIT_LIST_HEAD(&np->tc_indr_block_priv_list);
+ return flow_indr_dev_register(ice_indr_setup_tc_cb, np);
+}
+
+/**
+ * ice_setup_pf_sw - Setup the HW switch on startup or after reset
+ * @pf: board private structure
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int ice_setup_pf_sw(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ bool dvm = ice_is_dvm_ena(&pf->hw);
+ struct ice_vsi *vsi;
+ int status;
+
+ if (ice_is_reset_in_progress(pf->state))
+ return -EBUSY;
+
+ status = ice_aq_set_port_params(pf->hw.port_info, dvm, NULL);
+ if (status)
+ return -EIO;
+
+ vsi = ice_pf_vsi_setup(pf, pf->hw.port_info);
+ if (!vsi)
+ return -ENOMEM;
+
+ /* init channel list */
+ INIT_LIST_HEAD(&vsi->ch_list);
+
+ status = ice_cfg_netdev(vsi);
+ if (status)
+ goto unroll_vsi_setup;
+ /* netdev has to be configured before setting frame size */
+ ice_vsi_cfg_frame_size(vsi);
+
+ /* init indirect block notifications */
+ status = ice_tc_indir_block_register(vsi);
+ if (status) {
+ dev_err(dev, "Failed to register netdev notifier\n");
+ goto unroll_cfg_netdev;
+ }
+
+ /* Setup DCB netlink interface */
+ ice_dcbnl_setup(vsi);
+
+ /* registering the NAPI handler requires both the queues and
+ * netdev to be created, which are done in ice_pf_vsi_setup()
+ * and ice_cfg_netdev() respectively
+ */
+ ice_napi_add(vsi);
+
+ status = ice_init_mac_fltr(pf);
+ if (status)
+ goto unroll_napi_add;
+
+ return 0;
+
+unroll_napi_add:
+ ice_tc_indir_block_unregister(vsi);
+unroll_cfg_netdev:
+ if (vsi) {
+ ice_napi_del(vsi);
+ if (vsi->netdev) {
+ clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
+ free_netdev(vsi->netdev);
+ vsi->netdev = NULL;
+ }
+ }
+
+unroll_vsi_setup:
+ ice_vsi_release(vsi);
+ return status;
+}
+
+/**
+ * ice_get_avail_q_count - Get count of queues in use
+ * @pf_qmap: bitmap to get queue use count from
+ * @lock: pointer to a mutex that protects access to pf_qmap
+ * @size: size of the bitmap
+ */
+static u16
+ice_get_avail_q_count(unsigned long *pf_qmap, struct mutex *lock, u16 size)
+{
+ unsigned long bit;
+ u16 count = 0;
+
+ mutex_lock(lock);
+ for_each_clear_bit(bit, pf_qmap, size)
+ count++;
+ mutex_unlock(lock);
+
+ return count;
+}
+
+/**
+ * ice_get_avail_txq_count - Get count of Tx queues in use
+ * @pf: pointer to an ice_pf instance
+ */
+u16 ice_get_avail_txq_count(struct ice_pf *pf)
+{
+ return ice_get_avail_q_count(pf->avail_txqs, &pf->avail_q_mutex,
+ pf->max_pf_txqs);
+}
+
+/**
+ * ice_get_avail_rxq_count - Get count of Rx queues in use
+ * @pf: pointer to an ice_pf instance
+ */
+u16 ice_get_avail_rxq_count(struct ice_pf *pf)
+{
+ return ice_get_avail_q_count(pf->avail_rxqs, &pf->avail_q_mutex,
+ pf->max_pf_rxqs);
+}
+
+/**
+ * ice_deinit_pf - Unrolls initialziations done by ice_init_pf
+ * @pf: board private structure to initialize
+ */
+static void ice_deinit_pf(struct ice_pf *pf)
+{
+ ice_service_task_stop(pf);
+ mutex_destroy(&pf->adev_mutex);
+ mutex_destroy(&pf->sw_mutex);
+ mutex_destroy(&pf->tc_mutex);
+ mutex_destroy(&pf->avail_q_mutex);
+ mutex_destroy(&pf->vfs.table_lock);
+
+ if (pf->avail_txqs) {
+ bitmap_free(pf->avail_txqs);
+ pf->avail_txqs = NULL;
+ }
+
+ if (pf->avail_rxqs) {
+ bitmap_free(pf->avail_rxqs);
+ pf->avail_rxqs = NULL;
+ }
+
+ if (pf->ptp.clock)
+ ptp_clock_unregister(pf->ptp.clock);
+}
+
+/**
+ * ice_set_pf_caps - set PFs capability flags
+ * @pf: pointer to the PF instance
+ */
+static void ice_set_pf_caps(struct ice_pf *pf)
+{
+ struct ice_hw_func_caps *func_caps = &pf->hw.func_caps;
+
+ clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+ if (func_caps->common_cap.rdma)
+ set_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+ clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+ if (func_caps->common_cap.dcb)
+ set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+ clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
+ if (func_caps->common_cap.sr_iov_1_1) {
+ set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
+ pf->vfs.num_supported = min_t(int, func_caps->num_allocd_vfs,
+ ICE_MAX_SRIOV_VFS);
+ }
+ clear_bit(ICE_FLAG_RSS_ENA, pf->flags);
+ if (func_caps->common_cap.rss_table_size)
+ set_bit(ICE_FLAG_RSS_ENA, pf->flags);
+
+ clear_bit(ICE_FLAG_FD_ENA, pf->flags);
+ if (func_caps->fd_fltr_guar > 0 || func_caps->fd_fltr_best_effort > 0) {
+ u16 unused;
+
+ /* ctrl_vsi_idx will be set to a valid value when flow director
+ * is setup by ice_init_fdir
+ */
+ pf->ctrl_vsi_idx = ICE_NO_VSI;
+ set_bit(ICE_FLAG_FD_ENA, pf->flags);
+ /* force guaranteed filter pool for PF */
+ ice_alloc_fd_guar_item(&pf->hw, &unused,
+ func_caps->fd_fltr_guar);
+ /* force shared filter pool for PF */
+ ice_alloc_fd_shrd_item(&pf->hw, &unused,
+ func_caps->fd_fltr_best_effort);
+ }
+
+ clear_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags);
+ if (func_caps->common_cap.ieee_1588)
+ set_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags);
+
+ pf->max_pf_txqs = func_caps->common_cap.num_txq;
+ pf->max_pf_rxqs = func_caps->common_cap.num_rxq;
+}
+
+/**
+ * ice_init_pf - Initialize general software structures (struct ice_pf)
+ * @pf: board private structure to initialize
+ */
+static int ice_init_pf(struct ice_pf *pf)
+{
+ ice_set_pf_caps(pf);
+
+ mutex_init(&pf->sw_mutex);
+ mutex_init(&pf->tc_mutex);
+ mutex_init(&pf->adev_mutex);
+
+ INIT_HLIST_HEAD(&pf->aq_wait_list);
+ spin_lock_init(&pf->aq_wait_lock);
+ init_waitqueue_head(&pf->aq_wait_queue);
+
+ init_waitqueue_head(&pf->reset_wait_queue);
+
+ /* setup service timer and periodic service task */
+ timer_setup(&pf->serv_tmr, ice_service_timer, 0);
+ pf->serv_tmr_period = HZ;
+ INIT_WORK(&pf->serv_task, ice_service_task);
+ clear_bit(ICE_SERVICE_SCHED, pf->state);
+
+ mutex_init(&pf->avail_q_mutex);
+ pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL);
+ if (!pf->avail_txqs)
+ return -ENOMEM;
+
+ pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL);
+ if (!pf->avail_rxqs) {
+ bitmap_free(pf->avail_txqs);
+ pf->avail_txqs = NULL;
+ return -ENOMEM;
+ }
+
+ mutex_init(&pf->vfs.table_lock);
+ hash_init(pf->vfs.table);
+
+ return 0;
+}
+
+/**
+ * ice_reduce_msix_usage - Reduce usage of MSI-X vectors
+ * @pf: board private structure
+ * @v_remain: number of remaining MSI-X vectors to be distributed
+ *
+ * Reduce the usage of MSI-X vectors when entire request cannot be fulfilled.
+ * pf->num_lan_msix and pf->num_rdma_msix values are set based on number of
+ * remaining vectors.
+ */
+static void ice_reduce_msix_usage(struct ice_pf *pf, int v_remain)
+{
+ int v_rdma;
+
+ if (!ice_is_rdma_ena(pf)) {
+ pf->num_lan_msix = v_remain;
+ return;
+ }
+
+ /* RDMA needs at least 1 interrupt in addition to AEQ MSIX */
+ v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1;
+
+ if (v_remain < ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_RDMA_MSIX) {
+ dev_warn(ice_pf_to_dev(pf), "Not enough MSI-X vectors to support RDMA.\n");
+ clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+
+ pf->num_rdma_msix = 0;
+ pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX;
+ } else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) ||
+ (v_remain - v_rdma < v_rdma)) {
+ /* Support minimum RDMA and give remaining vectors to LAN MSIX */
+ pf->num_rdma_msix = ICE_MIN_RDMA_MSIX;
+ pf->num_lan_msix = v_remain - ICE_MIN_RDMA_MSIX;
+ } else {
+ /* Split remaining MSIX with RDMA after accounting for AEQ MSIX
+ */
+ pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 +
+ ICE_RDMA_NUM_AEQ_MSIX;
+ pf->num_lan_msix = v_remain - pf->num_rdma_msix;
+ }
+}
+
+/**
+ * ice_ena_msix_range - Request a range of MSIX vectors from the OS
+ * @pf: board private structure
+ *
+ * Compute the number of MSIX vectors wanted and request from the OS. Adjust
+ * device usage if there are not enough vectors. Return the number of vectors
+ * reserved or negative on failure.
+ */
+static int ice_ena_msix_range(struct ice_pf *pf)
+{
+ int num_cpus, hw_num_msix, v_other, v_wanted, v_actual;
+ struct device *dev = ice_pf_to_dev(pf);
+ int err, i;
+
+ hw_num_msix = pf->hw.func_caps.common_cap.num_msix_vectors;
+ num_cpus = num_online_cpus();
+
+ /* LAN miscellaneous handler */
+ v_other = ICE_MIN_LAN_OICR_MSIX;
+
+ /* Flow Director */
+ if (test_bit(ICE_FLAG_FD_ENA, pf->flags))
+ v_other += ICE_FDIR_MSIX;
+
+ /* switchdev */
+ v_other += ICE_ESWITCH_MSIX;
+
+ v_wanted = v_other;
+
+ /* LAN traffic */
+ pf->num_lan_msix = num_cpus;
+ v_wanted += pf->num_lan_msix;
+
+ /* RDMA auxiliary driver */
+ if (ice_is_rdma_ena(pf)) {
+ pf->num_rdma_msix = num_cpus + ICE_RDMA_NUM_AEQ_MSIX;
+ v_wanted += pf->num_rdma_msix;
+ }
+
+ if (v_wanted > hw_num_msix) {
+ int v_remain;
+
+ dev_warn(dev, "not enough device MSI-X vectors. wanted = %d, available = %d\n",
+ v_wanted, hw_num_msix);
+
+ if (hw_num_msix < ICE_MIN_MSIX) {
+ err = -ERANGE;
+ goto exit_err;
+ }
+
+ v_remain = hw_num_msix - v_other;
+ if (v_remain < ICE_MIN_LAN_TXRX_MSIX) {
+ v_other = ICE_MIN_MSIX - ICE_MIN_LAN_TXRX_MSIX;
+ v_remain = ICE_MIN_LAN_TXRX_MSIX;
+ }
+
+ ice_reduce_msix_usage(pf, v_remain);
+ v_wanted = pf->num_lan_msix + pf->num_rdma_msix + v_other;
+
+ dev_notice(dev, "Reducing request to %d MSI-X vectors for LAN traffic.\n",
+ pf->num_lan_msix);
+ if (ice_is_rdma_ena(pf))
+ dev_notice(dev, "Reducing request to %d MSI-X vectors for RDMA.\n",
+ pf->num_rdma_msix);
+ }
+
+ pf->msix_entries = devm_kcalloc(dev, v_wanted,
+ sizeof(*pf->msix_entries), GFP_KERNEL);
+ if (!pf->msix_entries) {
+ err = -ENOMEM;
+ goto exit_err;
+ }
+
+ for (i = 0; i < v_wanted; i++)
+ pf->msix_entries[i].entry = i;
+
+ /* actually reserve the vectors */
+ v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries,
+ ICE_MIN_MSIX, v_wanted);
+ if (v_actual < 0) {
+ dev_err(dev, "unable to reserve MSI-X vectors\n");
+ err = v_actual;
+ goto msix_err;
+ }
+
+ if (v_actual < v_wanted) {
+ dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n",
+ v_wanted, v_actual);
+
+ if (v_actual < ICE_MIN_MSIX) {
+ /* error if we can't get minimum vectors */
+ pci_disable_msix(pf->pdev);
+ err = -ERANGE;
+ goto msix_err;
+ } else {
+ int v_remain = v_actual - v_other;
+
+ if (v_remain < ICE_MIN_LAN_TXRX_MSIX)
+ v_remain = ICE_MIN_LAN_TXRX_MSIX;
+
+ ice_reduce_msix_usage(pf, v_remain);
+
+ dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n",
+ pf->num_lan_msix);
+
+ if (ice_is_rdma_ena(pf))
+ dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n",
+ pf->num_rdma_msix);
+ }
+ }
+
+ return v_actual;
+
+msix_err:
+ devm_kfree(dev, pf->msix_entries);
+
+exit_err:
+ pf->num_rdma_msix = 0;
+ pf->num_lan_msix = 0;
+ return err;
+}
+
+/**
+ * ice_dis_msix - Disable MSI-X interrupt setup in OS
+ * @pf: board private structure
+ */
+static void ice_dis_msix(struct ice_pf *pf)
+{
+ pci_disable_msix(pf->pdev);
+ devm_kfree(ice_pf_to_dev(pf), pf->msix_entries);
+ pf->msix_entries = NULL;
+}
+
+/**
+ * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme
+ * @pf: board private structure
+ */
+static void ice_clear_interrupt_scheme(struct ice_pf *pf)
+{
+ ice_dis_msix(pf);
+
+ if (pf->irq_tracker) {
+ devm_kfree(ice_pf_to_dev(pf), pf->irq_tracker);
+ pf->irq_tracker = NULL;
+ }
+}
+
+/**
+ * ice_init_interrupt_scheme - Determine proper interrupt scheme
+ * @pf: board private structure to initialize
+ */
+static int ice_init_interrupt_scheme(struct ice_pf *pf)
+{
+ int vectors;
+
+ vectors = ice_ena_msix_range(pf);
+
+ if (vectors < 0)
+ return vectors;
+
+ /* set up vector assignment tracking */
+ pf->irq_tracker = devm_kzalloc(ice_pf_to_dev(pf),
+ struct_size(pf->irq_tracker, list, vectors),
+ GFP_KERNEL);
+ if (!pf->irq_tracker) {
+ ice_dis_msix(pf);
+ return -ENOMEM;
+ }
+
+ /* populate SW interrupts pool with number of OS granted IRQs. */
+ pf->num_avail_sw_msix = (u16)vectors;
+ pf->irq_tracker->num_entries = (u16)vectors;
+ pf->irq_tracker->end = pf->irq_tracker->num_entries;
+
+ return 0;
+}
+
+/**
+ * ice_is_wol_supported - check if WoL is supported
+ * @hw: pointer to hardware info
+ *
+ * Check if WoL is supported based on the HW configuration.
+ * Returns true if NVM supports and enables WoL for this port, false otherwise
+ */
+bool ice_is_wol_supported(struct ice_hw *hw)
+{
+ u16 wol_ctrl;
+
+ /* A bit set to 1 in the NVM Software Reserved Word 2 (WoL control
+ * word) indicates WoL is not supported on the corresponding PF ID.
+ */
+ if (ice_read_sr_word(hw, ICE_SR_NVM_WOL_CFG, &wol_ctrl))
+ return false;
+
+ return !(BIT(hw->port_info->lport) & wol_ctrl);
+}
+
+/**
+ * ice_vsi_recfg_qs - Change the number of queues on a VSI
+ * @vsi: VSI being changed
+ * @new_rx: new number of Rx queues
+ * @new_tx: new number of Tx queues
+ * @locked: is adev device_lock held
+ *
+ * Only change the number of queues if new_tx, or new_rx is non-0.
+ *
+ * Returns 0 on success.
+ */
+int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked)
+{
+ struct ice_pf *pf = vsi->back;
+ int err = 0, timeout = 50;
+
+ if (!new_rx && !new_tx)
+ return -EINVAL;
+
+ while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) {
+ timeout--;
+ if (!timeout)
+ return -EBUSY;
+ usleep_range(1000, 2000);
+ }
+
+ if (new_tx)
+ vsi->req_txq = (u16)new_tx;
+ if (new_rx)
+ vsi->req_rxq = (u16)new_rx;
+
+ /* set for the next time the netdev is started */
+ if (!netif_running(vsi->netdev)) {
+ ice_vsi_rebuild(vsi, false);
+ dev_dbg(ice_pf_to_dev(pf), "Link is down, queue count change happens when link is brought up\n");
+ goto done;
+ }
+
+ ice_vsi_close(vsi);
+ ice_vsi_rebuild(vsi, false);
+ ice_pf_dcb_recfg(pf, locked);
+ ice_vsi_open(vsi);
+done:
+ clear_bit(ICE_CFG_BUSY, pf->state);
+ return err;
+}
+
+/**
+ * ice_set_safe_mode_vlan_cfg - configure PF VSI to allow all VLANs in safe mode
+ * @pf: PF to configure
+ *
+ * No VLAN offloads/filtering are advertised in safe mode so make sure the PF
+ * VSI can still Tx/Rx VLAN tagged packets.
+ */
+static void ice_set_safe_mode_vlan_cfg(struct ice_pf *pf)
+{
+ struct ice_vsi *vsi = ice_get_main_vsi(pf);
+ struct ice_vsi_ctx *ctxt;
+ struct ice_hw *hw;
+ int status;
+
+ if (!vsi)
+ return;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return;
+
+ hw = &pf->hw;
+ ctxt->info = vsi->info;
+
+ ctxt->info.valid_sections =
+ cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
+ ICE_AQ_VSI_PROP_SECURITY_VALID |
+ ICE_AQ_VSI_PROP_SW_VALID);
+
+ /* disable VLAN anti-spoof */
+ ctxt->info.sec_flags &= ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+ ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+
+ /* disable VLAN pruning and keep all other settings */
+ ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+
+ /* allow all VLANs on Tx and don't strip on Rx */
+ ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL |
+ ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING;
+
+ status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (status) {
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to update VSI for safe mode VLANs, err %d aq_err %s\n",
+ status, ice_aq_str(hw->adminq.sq_last_status));
+ } else {
+ vsi->info.sec_flags = ctxt->info.sec_flags;
+ vsi->info.sw_flags2 = ctxt->info.sw_flags2;
+ vsi->info.inner_vlan_flags = ctxt->info.inner_vlan_flags;
+ }
+
+ kfree(ctxt);
+}
+
+/**
+ * ice_log_pkg_init - log result of DDP package load
+ * @hw: pointer to hardware info
+ * @state: state of package load
+ */
+static void ice_log_pkg_init(struct ice_hw *hw, enum ice_ddp_state state)
+{
+ struct ice_pf *pf = hw->back;
+ struct device *dev;
+
+ dev = ice_pf_to_dev(pf);
+
+ switch (state) {
+ case ICE_DDP_PKG_SUCCESS:
+ dev_info(dev, "The DDP package was successfully loaded: %s version %d.%d.%d.%d\n",
+ hw->active_pkg_name,
+ hw->active_pkg_ver.major,
+ hw->active_pkg_ver.minor,
+ hw->active_pkg_ver.update,
+ hw->active_pkg_ver.draft);
+ break;
+ case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED:
+ dev_info(dev, "DDP package already present on device: %s version %d.%d.%d.%d\n",
+ hw->active_pkg_name,
+ hw->active_pkg_ver.major,
+ hw->active_pkg_ver.minor,
+ hw->active_pkg_ver.update,
+ hw->active_pkg_ver.draft);
+ break;
+ case ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED:
+ dev_err(dev, "The device has a DDP package that is not supported by the driver. The device has package '%s' version %d.%d.x.x. The driver requires version %d.%d.x.x. Entering Safe Mode.\n",
+ hw->active_pkg_name,
+ hw->active_pkg_ver.major,
+ hw->active_pkg_ver.minor,
+ ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
+ break;
+ case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED:
+ dev_info(dev, "The driver could not load the DDP package file because a compatible DDP package is already present on the device. The device has package '%s' version %d.%d.%d.%d. The package file found by the driver: '%s' version %d.%d.%d.%d.\n",
+ hw->active_pkg_name,
+ hw->active_pkg_ver.major,
+ hw->active_pkg_ver.minor,
+ hw->active_pkg_ver.update,
+ hw->active_pkg_ver.draft,
+ hw->pkg_name,
+ hw->pkg_ver.major,
+ hw->pkg_ver.minor,
+ hw->pkg_ver.update,
+ hw->pkg_ver.draft);
+ break;
+ case ICE_DDP_PKG_FW_MISMATCH:
+ dev_err(dev, "The firmware loaded on the device is not compatible with the DDP package. Please update the device's NVM. Entering safe mode.\n");
+ break;
+ case ICE_DDP_PKG_INVALID_FILE:
+ dev_err(dev, "The DDP package file is invalid. Entering Safe Mode.\n");
+ break;
+ case ICE_DDP_PKG_FILE_VERSION_TOO_HIGH:
+ dev_err(dev, "The DDP package file version is higher than the driver supports. Please use an updated driver. Entering Safe Mode.\n");
+ break;
+ case ICE_DDP_PKG_FILE_VERSION_TOO_LOW:
+ dev_err(dev, "The DDP package file version is lower than the driver supports. The driver requires version %d.%d.x.x. Please use an updated DDP Package file. Entering Safe Mode.\n",
+ ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
+ break;
+ case ICE_DDP_PKG_FILE_SIGNATURE_INVALID:
+ dev_err(dev, "The DDP package could not be loaded because its signature is not valid. Please use a valid DDP Package. Entering Safe Mode.\n");
+ break;
+ case ICE_DDP_PKG_FILE_REVISION_TOO_LOW:
+ dev_err(dev, "The DDP Package could not be loaded because its security revision is too low. Please use an updated DDP Package. Entering Safe Mode.\n");
+ break;
+ case ICE_DDP_PKG_LOAD_ERROR:
+ dev_err(dev, "An error occurred on the device while loading the DDP package. The device will be reset.\n");
+ /* poll for reset to complete */
+ if (ice_check_reset(hw))
+ dev_err(dev, "Error resetting device. Please reload the driver\n");
+ break;
+ case ICE_DDP_PKG_ERR:
+ default:
+ dev_err(dev, "An unknown error occurred when loading the DDP package. Entering Safe Mode.\n");
+ break;
+ }
+}
+
+/**
+ * ice_load_pkg - load/reload the DDP Package file
+ * @firmware: firmware structure when firmware requested or NULL for reload
+ * @pf: pointer to the PF instance
+ *
+ * Called on probe and post CORER/GLOBR rebuild to load DDP Package and
+ * initialize HW tables.
+ */
+static void
+ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf)
+{
+ enum ice_ddp_state state = ICE_DDP_PKG_ERR;
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+
+ /* Load DDP Package */
+ if (firmware && !hw->pkg_copy) {
+ state = ice_copy_and_init_pkg(hw, firmware->data,
+ firmware->size);
+ ice_log_pkg_init(hw, state);
+ } else if (!firmware && hw->pkg_copy) {
+ /* Reload package during rebuild after CORER/GLOBR reset */
+ state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
+ ice_log_pkg_init(hw, state);
+ } else {
+ dev_err(dev, "The DDP package file failed to load. Entering Safe Mode.\n");
+ }
+
+ if (!ice_is_init_pkg_successful(state)) {
+ /* Safe Mode */
+ clear_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
+ return;
+ }
+
+ /* Successful download package is the precondition for advanced
+ * features, hence setting the ICE_FLAG_ADV_FEATURES flag
+ */
+ set_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
+}
+
+/**
+ * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines
+ * @pf: pointer to the PF structure
+ *
+ * There is no error returned here because the driver should be able to handle
+ * 128 Byte cache lines, so we only print a warning in case issues are seen,
+ * specifically with Tx.
+ */
+static void ice_verify_cacheline_size(struct ice_pf *pf)
+{
+ if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M)
+ dev_warn(ice_pf_to_dev(pf), "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n",
+ ICE_CACHE_LINE_BYTES);
+}
+
+/**
+ * ice_send_version - update firmware with driver version
+ * @pf: PF struct
+ *
+ * Returns 0 on success, else error code
+ */
+static int ice_send_version(struct ice_pf *pf)
+{
+ struct ice_driver_ver dv;
+
+ dv.major_ver = 0xff;
+ dv.minor_ver = 0xff;
+ dv.build_ver = 0xff;
+ dv.subbuild_ver = 0;
+ strscpy((char *)dv.driver_string, UTS_RELEASE,
+ sizeof(dv.driver_string));
+ return ice_aq_send_driver_ver(&pf->hw, &dv, NULL);
+}
+
+/**
+ * ice_init_fdir - Initialize flow director VSI and configuration
+ * @pf: pointer to the PF instance
+ *
+ * returns 0 on success, negative on error
+ */
+static int ice_init_fdir(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_vsi *ctrl_vsi;
+ int err;
+
+ /* Side Band Flow Director needs to have a control VSI.
+ * Allocate it and store it in the PF.
+ */
+ ctrl_vsi = ice_ctrl_vsi_setup(pf, pf->hw.port_info);
+ if (!ctrl_vsi) {
+ dev_dbg(dev, "could not create control VSI\n");
+ return -ENOMEM;
+ }
+
+ err = ice_vsi_open_ctrl(ctrl_vsi);
+ if (err) {
+ dev_dbg(dev, "could not open control VSI\n");
+ goto err_vsi_open;
+ }
+
+ mutex_init(&pf->hw.fdir_fltr_lock);
+
+ err = ice_fdir_create_dflt_rules(pf);
+ if (err)
+ goto err_fdir_rule;
+
+ return 0;
+
+err_fdir_rule:
+ ice_fdir_release_flows(&pf->hw);
+ ice_vsi_close(ctrl_vsi);
+err_vsi_open:
+ ice_vsi_release(ctrl_vsi);
+ if (pf->ctrl_vsi_idx != ICE_NO_VSI) {
+ pf->vsi[pf->ctrl_vsi_idx] = NULL;
+ pf->ctrl_vsi_idx = ICE_NO_VSI;
+ }
+ return err;
+}
+
+/**
+ * ice_get_opt_fw_name - return optional firmware file name or NULL
+ * @pf: pointer to the PF instance
+ */
+static char *ice_get_opt_fw_name(struct ice_pf *pf)
+{
+ /* Optional firmware name same as default with additional dash
+ * followed by a EUI-64 identifier (PCIe Device Serial Number)
+ */
+ struct pci_dev *pdev = pf->pdev;
+ char *opt_fw_filename;
+ u64 dsn;
+
+ /* Determine the name of the optional file using the DSN (two
+ * dwords following the start of the DSN Capability).
+ */
+ dsn = pci_get_dsn(pdev);
+ if (!dsn)
+ return NULL;
+
+ opt_fw_filename = kzalloc(NAME_MAX, GFP_KERNEL);
+ if (!opt_fw_filename)
+ return NULL;
+
+ snprintf(opt_fw_filename, NAME_MAX, "%sice-%016llx.pkg",
+ ICE_DDP_PKG_PATH, dsn);
+
+ return opt_fw_filename;
+}
+
+/**
+ * ice_request_fw - Device initialization routine
+ * @pf: pointer to the PF instance
+ */
+static void ice_request_fw(struct ice_pf *pf)
+{
+ char *opt_fw_filename = ice_get_opt_fw_name(pf);
+ const struct firmware *firmware = NULL;
+ struct device *dev = ice_pf_to_dev(pf);
+ int err = 0;
+
+ /* optional device-specific DDP (if present) overrides the default DDP
+ * package file. kernel logs a debug message if the file doesn't exist,
+ * and warning messages for other errors.
+ */
+ if (opt_fw_filename) {
+ err = firmware_request_nowarn(&firmware, opt_fw_filename, dev);
+ if (err) {
+ kfree(opt_fw_filename);
+ goto dflt_pkg_load;
+ }
+
+ /* request for firmware was successful. Download to device */
+ ice_load_pkg(firmware, pf);
+ kfree(opt_fw_filename);
+ release_firmware(firmware);
+ return;
+ }
+
+dflt_pkg_load:
+ err = request_firmware(&firmware, ICE_DDP_PKG_FILE, dev);
+ if (err) {
+ dev_err(dev, "The DDP package file was not found or could not be read. Entering Safe Mode\n");
+ return;
+ }
+
+ /* request for firmware was successful. Download to device */
+ ice_load_pkg(firmware, pf);
+ release_firmware(firmware);
+}
+
+/**
+ * ice_print_wake_reason - show the wake up cause in the log
+ * @pf: pointer to the PF struct
+ */
+static void ice_print_wake_reason(struct ice_pf *pf)
+{
+ u32 wus = pf->wakeup_reason;
+ const char *wake_str;
+
+ /* if no wake event, nothing to print */
+ if (!wus)
+ return;
+
+ if (wus & PFPM_WUS_LNKC_M)
+ wake_str = "Link\n";
+ else if (wus & PFPM_WUS_MAG_M)
+ wake_str = "Magic Packet\n";
+ else if (wus & PFPM_WUS_MNG_M)
+ wake_str = "Management\n";
+ else if (wus & PFPM_WUS_FW_RST_WK_M)
+ wake_str = "Firmware Reset\n";
+ else
+ wake_str = "Unknown\n";
+
+ dev_info(ice_pf_to_dev(pf), "Wake reason: %s", wake_str);
+}
+
+/**
+ * ice_register_netdev - register netdev and devlink port
+ * @pf: pointer to the PF struct
+ */
+static int ice_register_netdev(struct ice_pf *pf)
+{
+ struct ice_vsi *vsi;
+ int err = 0;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi || !vsi->netdev)
+ return -EIO;
+
+ err = ice_devlink_create_pf_port(pf);
+ if (err)
+ goto err_devlink_create;
+
+ err = register_netdev(vsi->netdev);
+ if (err)
+ goto err_register_netdev;
+
+ set_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
+ netif_carrier_off(vsi->netdev);
+ netif_tx_stop_all_queues(vsi->netdev);
+
+ devlink_port_type_eth_set(&pf->devlink_port, vsi->netdev);
+
+ return 0;
+err_register_netdev:
+ ice_devlink_destroy_pf_port(pf);
+err_devlink_create:
+ free_netdev(vsi->netdev);
+ vsi->netdev = NULL;
+ clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
+ return err;
+}
+
+/**
+ * ice_probe - Device initialization routine
+ * @pdev: PCI device information struct
+ * @ent: entry in ice_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int
+ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
+{
+ struct device *dev = &pdev->dev;
+ struct ice_pf *pf;
+ struct ice_hw *hw;
+ int i, err;
+
+ if (pdev->is_virtfn) {
+ dev_err(dev, "can't probe a virtual function\n");
+ return -EINVAL;
+ }
+
+ /* when under a kdump kernel initiate a reset before enabling the
+ * device in order to clear out any pending DMA transactions. These
+ * transactions can cause some systems to machine check when doing
+ * the pcim_enable_device() below.
+ */
+ if (is_kdump_kernel()) {
+ pci_save_state(pdev);
+ pci_clear_master(pdev);
+ err = pcie_flr(pdev);
+ if (err)
+ return err;
+ pci_restore_state(pdev);
+ }
+
+ /* this driver uses devres, see
+ * Documentation/driver-api/driver-model/devres.rst
+ */
+ err = pcim_enable_device(pdev);
+ if (err)
+ return err;
+
+ err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), dev_driver_string(dev));
+ if (err) {
+ dev_err(dev, "BAR0 I/O map error %d\n", err);
+ return err;
+ }
+
+ pf = ice_allocate_pf(dev);
+ if (!pf)
+ return -ENOMEM;
+
+ /* initialize Auxiliary index to invalid value */
+ pf->aux_idx = -1;
+
+ /* set up for high or low DMA */
+ err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_err(dev, "DMA configuration failed: 0x%x\n", err);
+ return err;
+ }
+
+ pci_set_master(pdev);
+
+ pf->pdev = pdev;
+ pci_set_drvdata(pdev, pf);
+ set_bit(ICE_DOWN, pf->state);
+ /* Disable service task until DOWN bit is cleared */
+ set_bit(ICE_SERVICE_DIS, pf->state);
+
+ hw = &pf->hw;
+ hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0];
+ pci_save_state(pdev);
+
+ hw->back = pf;
+ hw->vendor_id = pdev->vendor;
+ hw->device_id = pdev->device;
+ pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
+ hw->subsystem_vendor_id = pdev->subsystem_vendor;
+ hw->subsystem_device_id = pdev->subsystem_device;
+ hw->bus.device = PCI_SLOT(pdev->devfn);
+ hw->bus.func = PCI_FUNC(pdev->devfn);
+ ice_set_ctrlq_len(hw);
+
+ pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M);
+
+#ifndef CONFIG_DYNAMIC_DEBUG
+ if (debug < -1)
+ hw->debug_mask = debug;
+#endif
+
+ err = ice_init_hw(hw);
+ if (err) {
+ dev_err(dev, "ice_init_hw failed: %d\n", err);
+ err = -EIO;
+ goto err_exit_unroll;
+ }
+
+ ice_init_feature_support(pf);
+
+ ice_request_fw(pf);
+
+ /* if ice_request_fw fails, ICE_FLAG_ADV_FEATURES bit won't be
+ * set in pf->state, which will cause ice_is_safe_mode to return
+ * true
+ */
+ if (ice_is_safe_mode(pf)) {
+ /* we already got function/device capabilities but these don't
+ * reflect what the driver needs to do in safe mode. Instead of
+ * adding conditional logic everywhere to ignore these
+ * device/function capabilities, override them.
+ */
+ ice_set_safe_mode_caps(hw);
+ }
+
+ err = ice_init_pf(pf);
+ if (err) {
+ dev_err(dev, "ice_init_pf failed: %d\n", err);
+ goto err_init_pf_unroll;
+ }
+
+ ice_devlink_init_regions(pf);
+
+ pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port;
+ pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port;
+ pf->hw.udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
+ pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared;
+ i = 0;
+ if (pf->hw.tnl.valid_count[TNL_VXLAN]) {
+ pf->hw.udp_tunnel_nic.tables[i].n_entries =
+ pf->hw.tnl.valid_count[TNL_VXLAN];
+ pf->hw.udp_tunnel_nic.tables[i].tunnel_types =
+ UDP_TUNNEL_TYPE_VXLAN;
+ i++;
+ }
+ if (pf->hw.tnl.valid_count[TNL_GENEVE]) {
+ pf->hw.udp_tunnel_nic.tables[i].n_entries =
+ pf->hw.tnl.valid_count[TNL_GENEVE];
+ pf->hw.udp_tunnel_nic.tables[i].tunnel_types =
+ UDP_TUNNEL_TYPE_GENEVE;
+ i++;
+ }
+
+ pf->num_alloc_vsi = hw->func_caps.guar_num_vsi;
+ if (!pf->num_alloc_vsi) {
+ err = -EIO;
+ goto err_init_pf_unroll;
+ }
+ if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) {
+ dev_warn(&pf->pdev->dev,
+ "limiting the VSI count due to UDP tunnel limitation %d > %d\n",
+ pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES);
+ pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES;
+ }
+
+ pf->vsi = devm_kcalloc(dev, pf->num_alloc_vsi, sizeof(*pf->vsi),
+ GFP_KERNEL);
+ if (!pf->vsi) {
+ err = -ENOMEM;
+ goto err_init_pf_unroll;
+ }
+
+ err = ice_init_interrupt_scheme(pf);
+ if (err) {
+ dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err);
+ err = -EIO;
+ goto err_init_vsi_unroll;
+ }
+
+ /* In case of MSIX we are going to setup the misc vector right here
+ * to handle admin queue events etc. In case of legacy and MSI
+ * the misc functionality and queue processing is combined in
+ * the same vector and that gets setup at open.
+ */
+ err = ice_req_irq_msix_misc(pf);
+ if (err) {
+ dev_err(dev, "setup of misc vector failed: %d\n", err);
+ goto err_init_interrupt_unroll;
+ }
+
+ /* create switch struct for the switch element created by FW on boot */
+ pf->first_sw = devm_kzalloc(dev, sizeof(*pf->first_sw), GFP_KERNEL);
+ if (!pf->first_sw) {
+ err = -ENOMEM;
+ goto err_msix_misc_unroll;
+ }
+
+ if (hw->evb_veb)
+ pf->first_sw->bridge_mode = BRIDGE_MODE_VEB;
+ else
+ pf->first_sw->bridge_mode = BRIDGE_MODE_VEPA;
+
+ pf->first_sw->pf = pf;
+
+ /* record the sw_id available for later use */
+ pf->first_sw->sw_id = hw->port_info->sw_id;
+
+ err = ice_setup_pf_sw(pf);
+ if (err) {
+ dev_err(dev, "probe failed due to setup PF switch: %d\n", err);
+ goto err_alloc_sw_unroll;
+ }
+
+ clear_bit(ICE_SERVICE_DIS, pf->state);
+
+ /* tell the firmware we are up */
+ err = ice_send_version(pf);
+ if (err) {
+ dev_err(dev, "probe failed sending driver version %s. error: %d\n",
+ UTS_RELEASE, err);
+ goto err_send_version_unroll;
+ }
+
+ /* since everything is good, start the service timer */
+ mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
+
+ err = ice_init_link_events(pf->hw.port_info);
+ if (err) {
+ dev_err(dev, "ice_init_link_events failed: %d\n", err);
+ goto err_send_version_unroll;
+ }
+
+ /* not a fatal error if this fails */
+ err = ice_init_nvm_phy_type(pf->hw.port_info);
+ if (err)
+ dev_err(dev, "ice_init_nvm_phy_type failed: %d\n", err);
+
+ /* not a fatal error if this fails */
+ err = ice_update_link_info(pf->hw.port_info);
+ if (err)
+ dev_err(dev, "ice_update_link_info failed: %d\n", err);
+
+ ice_init_link_dflt_override(pf->hw.port_info);
+
+ ice_check_link_cfg_err(pf,
+ pf->hw.port_info->phy.link_info.link_cfg_err);
+
+ /* if media available, initialize PHY settings */
+ if (pf->hw.port_info->phy.link_info.link_info &
+ ICE_AQ_MEDIA_AVAILABLE) {
+ /* not a fatal error if this fails */
+ err = ice_init_phy_user_cfg(pf->hw.port_info);
+ if (err)
+ dev_err(dev, "ice_init_phy_user_cfg failed: %d\n", err);
+
+ if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) {
+ struct ice_vsi *vsi = ice_get_main_vsi(pf);
+
+ if (vsi)
+ ice_configure_phy(vsi);
+ }
+ } else {
+ set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+ }
+
+ ice_verify_cacheline_size(pf);
+
+ /* Save wakeup reason register for later use */
+ pf->wakeup_reason = rd32(hw, PFPM_WUS);
+
+ /* check for a power management event */
+ ice_print_wake_reason(pf);
+
+ /* clear wake status, all bits */
+ wr32(hw, PFPM_WUS, U32_MAX);
+
+ /* Disable WoL at init, wait for user to enable */
+ device_set_wakeup_enable(dev, false);
+
+ if (ice_is_safe_mode(pf)) {
+ ice_set_safe_mode_vlan_cfg(pf);
+ goto probe_done;
+ }
+
+ /* initialize DDP driven features */
+ if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
+ ice_ptp_init(pf);
+
+ if (ice_is_feature_supported(pf, ICE_F_GNSS))
+ ice_gnss_init(pf);
+
+ /* Note: Flow director init failure is non-fatal to load */
+ if (ice_init_fdir(pf))
+ dev_err(dev, "could not initialize flow director\n");
+
+ /* Note: DCB init failure is non-fatal to load */
+ if (ice_init_pf_dcb(pf, false)) {
+ clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+ clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+ } else {
+ ice_cfg_lldp_mib_change(&pf->hw, true);
+ }
+
+ if (ice_init_lag(pf))
+ dev_warn(dev, "Failed to init link aggregation support\n");
+
+ /* print PCI link speed and width */
+ pcie_print_link_status(pf->pdev);
+
+probe_done:
+ err = ice_register_netdev(pf);
+ if (err)
+ goto err_netdev_reg;
+
+ err = ice_devlink_register_params(pf);
+ if (err)
+ goto err_netdev_reg;
+
+ /* ready to go, so clear down state bit */
+ clear_bit(ICE_DOWN, pf->state);
+ if (ice_is_rdma_ena(pf)) {
+ pf->aux_idx = ida_alloc(&ice_aux_ida, GFP_KERNEL);
+ if (pf->aux_idx < 0) {
+ dev_err(dev, "Failed to allocate device ID for AUX driver\n");
+ err = -ENOMEM;
+ goto err_devlink_reg_param;
+ }
+
+ err = ice_init_rdma(pf);
+ if (err) {
+ dev_err(dev, "Failed to initialize RDMA: %d\n", err);
+ err = -EIO;
+ goto err_init_aux_unroll;
+ }
+ } else {
+ dev_warn(dev, "RDMA is not supported on this device\n");
+ }
+
+ ice_devlink_register(pf);
+ return 0;
+
+err_init_aux_unroll:
+ pf->adev = NULL;
+ ida_free(&ice_aux_ida, pf->aux_idx);
+err_devlink_reg_param:
+ ice_devlink_unregister_params(pf);
+err_netdev_reg:
+err_send_version_unroll:
+ ice_vsi_release_all(pf);
+err_alloc_sw_unroll:
+ set_bit(ICE_SERVICE_DIS, pf->state);
+ set_bit(ICE_DOWN, pf->state);
+ devm_kfree(dev, pf->first_sw);
+err_msix_misc_unroll:
+ ice_free_irq_msix_misc(pf);
+err_init_interrupt_unroll:
+ ice_clear_interrupt_scheme(pf);
+err_init_vsi_unroll:
+ devm_kfree(dev, pf->vsi);
+err_init_pf_unroll:
+ ice_deinit_pf(pf);
+ ice_devlink_destroy_regions(pf);
+ ice_deinit_hw(hw);
+err_exit_unroll:
+ pci_disable_device(pdev);
+ return err;
+}
+
+/**
+ * ice_set_wake - enable or disable Wake on LAN
+ * @pf: pointer to the PF struct
+ *
+ * Simple helper for WoL control
+ */
+static void ice_set_wake(struct ice_pf *pf)
+{
+ struct ice_hw *hw = &pf->hw;
+ bool wol = pf->wol_ena;
+
+ /* clear wake state, otherwise new wake events won't fire */
+ wr32(hw, PFPM_WUS, U32_MAX);
+
+ /* enable / disable APM wake up, no RMW needed */
+ wr32(hw, PFPM_APM, wol ? PFPM_APM_APME_M : 0);
+
+ /* set magic packet filter enabled */
+ wr32(hw, PFPM_WUFC, wol ? PFPM_WUFC_MAG_M : 0);
+}
+
+/**
+ * ice_setup_mc_magic_wake - setup device to wake on multicast magic packet
+ * @pf: pointer to the PF struct
+ *
+ * Issue firmware command to enable multicast magic wake, making
+ * sure that any locally administered address (LAA) is used for
+ * wake, and that PF reset doesn't undo the LAA.
+ */
+static void ice_setup_mc_magic_wake(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ u8 mac_addr[ETH_ALEN];
+ struct ice_vsi *vsi;
+ int status;
+ u8 flags;
+
+ if (!pf->wol_ena)
+ return;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi)
+ return;
+
+ /* Get current MAC address in case it's an LAA */
+ if (vsi->netdev)
+ ether_addr_copy(mac_addr, vsi->netdev->dev_addr);
+ else
+ ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
+
+ flags = ICE_AQC_MAN_MAC_WR_MC_MAG_EN |
+ ICE_AQC_MAN_MAC_UPDATE_LAA_WOL |
+ ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP;
+
+ status = ice_aq_manage_mac_write(hw, mac_addr, flags, NULL);
+ if (status)
+ dev_err(dev, "Failed to enable Multicast Magic Packet wake, err %d aq_err %s\n",
+ status, ice_aq_str(hw->adminq.sq_last_status));
+}
+
+/**
+ * ice_remove - Device removal routine
+ * @pdev: PCI device information struct
+ */
+static void ice_remove(struct pci_dev *pdev)
+{
+ struct ice_pf *pf = pci_get_drvdata(pdev);
+ int i;
+
+ ice_devlink_unregister(pf);
+ for (i = 0; i < ICE_MAX_RESET_WAIT; i++) {
+ if (!ice_is_reset_in_progress(pf->state))
+ break;
+ msleep(100);
+ }
+
+ ice_tc_indir_block_remove(pf);
+
+ if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) {
+ set_bit(ICE_VF_RESETS_DISABLED, pf->state);
+ ice_free_vfs(pf);
+ }
+
+ ice_service_task_stop(pf);
+
+ ice_aq_cancel_waiting_tasks(pf);
+ ice_unplug_aux_dev(pf);
+ if (pf->aux_idx >= 0)
+ ida_free(&ice_aux_ida, pf->aux_idx);
+ ice_devlink_unregister_params(pf);
+ set_bit(ICE_DOWN, pf->state);
+
+ ice_deinit_lag(pf);
+ if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
+ ice_ptp_release(pf);
+ if (ice_is_feature_supported(pf, ICE_F_GNSS))
+ ice_gnss_exit(pf);
+ if (!ice_is_safe_mode(pf))
+ ice_remove_arfs(pf);
+ ice_setup_mc_magic_wake(pf);
+ ice_vsi_release_all(pf);
+ mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
+ ice_set_wake(pf);
+ ice_free_irq_msix_misc(pf);
+ ice_for_each_vsi(pf, i) {
+ if (!pf->vsi[i])
+ continue;
+ ice_vsi_free_q_vectors(pf->vsi[i]);
+ }
+ ice_deinit_pf(pf);
+ ice_devlink_destroy_regions(pf);
+ ice_deinit_hw(&pf->hw);
+
+ /* Issue a PFR as part of the prescribed driver unload flow. Do not
+ * do it via ice_schedule_reset() since there is no need to rebuild
+ * and the service task is already stopped.
+ */
+ ice_reset(&pf->hw, ICE_RESET_PFR);
+ pci_wait_for_pending_transaction(pdev);
+ ice_clear_interrupt_scheme(pf);
+ pci_disable_device(pdev);
+}
+
+/**
+ * ice_shutdown - PCI callback for shutting down device
+ * @pdev: PCI device information struct
+ */
+static void ice_shutdown(struct pci_dev *pdev)
+{
+ struct ice_pf *pf = pci_get_drvdata(pdev);
+
+ ice_remove(pdev);
+
+ if (system_state == SYSTEM_POWER_OFF) {
+ pci_wake_from_d3(pdev, pf->wol_ena);
+ pci_set_power_state(pdev, PCI_D3hot);
+ }
+}
+
+#ifdef CONFIG_PM
+/**
+ * ice_prepare_for_shutdown - prep for PCI shutdown
+ * @pf: board private structure
+ *
+ * Inform or close all dependent features in prep for PCI device shutdown
+ */
+static void ice_prepare_for_shutdown(struct ice_pf *pf)
+{
+ struct ice_hw *hw = &pf->hw;
+ u32 v;
+
+ /* Notify VFs of impending reset */
+ if (ice_check_sq_alive(hw, &hw->mailboxq))
+ ice_vc_notify_reset(pf);
+
+ dev_dbg(ice_pf_to_dev(pf), "Tearing down internal switch for shutdown\n");
+
+ /* disable the VSIs and their queues that are not already DOWN */
+ ice_pf_dis_all_vsi(pf, false);
+
+ ice_for_each_vsi(pf, v)
+ if (pf->vsi[v])
+ pf->vsi[v]->vsi_num = 0;
+
+ ice_shutdown_all_ctrlq(hw);
+}
+
+/**
+ * ice_reinit_interrupt_scheme - Reinitialize interrupt scheme
+ * @pf: board private structure to reinitialize
+ *
+ * This routine reinitialize interrupt scheme that was cleared during
+ * power management suspend callback.
+ *
+ * This should be called during resume routine to re-allocate the q_vectors
+ * and reacquire interrupts.
+ */
+static int ice_reinit_interrupt_scheme(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ int ret, v;
+
+ /* Since we clear MSIX flag during suspend, we need to
+ * set it back during resume...
+ */
+
+ ret = ice_init_interrupt_scheme(pf);
+ if (ret) {
+ dev_err(dev, "Failed to re-initialize interrupt %d\n", ret);
+ return ret;
+ }
+
+ /* Remap vectors and rings, after successful re-init interrupts */
+ ice_for_each_vsi(pf, v) {
+ if (!pf->vsi[v])
+ continue;
+
+ ret = ice_vsi_alloc_q_vectors(pf->vsi[v]);
+ if (ret)
+ goto err_reinit;
+ ice_vsi_map_rings_to_vectors(pf->vsi[v]);
+ }
+
+ ret = ice_req_irq_msix_misc(pf);
+ if (ret) {
+ dev_err(dev, "Setting up misc vector failed after device suspend %d\n",
+ ret);
+ goto err_reinit;
+ }
+
+ return 0;
+
+err_reinit:
+ while (v--)
+ if (pf->vsi[v])
+ ice_vsi_free_q_vectors(pf->vsi[v]);
+
+ return ret;
+}
+
+/**
+ * ice_suspend
+ * @dev: generic device information structure
+ *
+ * Power Management callback to quiesce the device and prepare
+ * for D3 transition.
+ */
+static int __maybe_unused ice_suspend(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct ice_pf *pf;
+ int disabled, v;
+
+ pf = pci_get_drvdata(pdev);
+
+ if (!ice_pf_state_is_nominal(pf)) {
+ dev_err(dev, "Device is not ready, no need to suspend it\n");
+ return -EBUSY;
+ }
+
+ /* Stop watchdog tasks until resume completion.
+ * Even though it is most likely that the service task is
+ * disabled if the device is suspended or down, the service task's
+ * state is controlled by a different state bit, and we should
+ * store and honor whatever state that bit is in at this point.
+ */
+ disabled = ice_service_task_stop(pf);
+
+ ice_unplug_aux_dev(pf);
+
+ /* Already suspended?, then there is nothing to do */
+ if (test_and_set_bit(ICE_SUSPENDED, pf->state)) {
+ if (!disabled)
+ ice_service_task_restart(pf);
+ return 0;
+ }
+
+ if (test_bit(ICE_DOWN, pf->state) ||
+ ice_is_reset_in_progress(pf->state)) {
+ dev_err(dev, "can't suspend device in reset or already down\n");
+ if (!disabled)
+ ice_service_task_restart(pf);
+ return 0;
+ }
+
+ ice_setup_mc_magic_wake(pf);
+
+ ice_prepare_for_shutdown(pf);
+
+ ice_set_wake(pf);
+
+ /* Free vectors, clear the interrupt scheme and release IRQs
+ * for proper hibernation, especially with large number of CPUs.
+ * Otherwise hibernation might fail when mapping all the vectors back
+ * to CPU0.
+ */
+ ice_free_irq_msix_misc(pf);
+ ice_for_each_vsi(pf, v) {
+ if (!pf->vsi[v])
+ continue;
+ ice_vsi_free_q_vectors(pf->vsi[v]);
+ }
+ ice_clear_interrupt_scheme(pf);
+
+ pci_save_state(pdev);
+ pci_wake_from_d3(pdev, pf->wol_ena);
+ pci_set_power_state(pdev, PCI_D3hot);
+ return 0;
+}
+
+/**
+ * ice_resume - PM callback for waking up from D3
+ * @dev: generic device information structure
+ */
+static int __maybe_unused ice_resume(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ enum ice_reset_req reset_type;
+ struct ice_pf *pf;
+ struct ice_hw *hw;
+ int ret;
+
+ pci_set_power_state(pdev, PCI_D0);
+ pci_restore_state(pdev);
+ pci_save_state(pdev);
+
+ if (!pci_device_is_present(pdev))
+ return -ENODEV;
+
+ ret = pci_enable_device_mem(pdev);
+ if (ret) {
+ dev_err(dev, "Cannot enable device after suspend\n");
+ return ret;
+ }
+
+ pf = pci_get_drvdata(pdev);
+ hw = &pf->hw;
+
+ pf->wakeup_reason = rd32(hw, PFPM_WUS);
+ ice_print_wake_reason(pf);
+
+ /* We cleared the interrupt scheme when we suspended, so we need to
+ * restore it now to resume device functionality.
+ */
+ ret = ice_reinit_interrupt_scheme(pf);
+ if (ret)
+ dev_err(dev, "Cannot restore interrupt scheme: %d\n", ret);
+
+ clear_bit(ICE_DOWN, pf->state);
+ /* Now perform PF reset and rebuild */
+ reset_type = ICE_RESET_PFR;
+ /* re-enable service task for reset, but allow reset to schedule it */
+ clear_bit(ICE_SERVICE_DIS, pf->state);
+
+ if (ice_schedule_reset(pf, reset_type))
+ dev_err(dev, "Reset during resume failed.\n");
+
+ clear_bit(ICE_SUSPENDED, pf->state);
+ ice_service_task_restart(pf);
+
+ /* Restart the service task */
+ mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
+
+ return 0;
+}
+#endif /* CONFIG_PM */
+
+/**
+ * ice_pci_err_detected - warning that PCI error has been detected
+ * @pdev: PCI device information struct
+ * @err: the type of PCI error
+ *
+ * Called to warn that something happened on the PCI bus and the error handling
+ * is in progress. Allows the driver to gracefully prepare/handle PCI errors.
+ */
+static pci_ers_result_t
+ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err)
+{
+ struct ice_pf *pf = pci_get_drvdata(pdev);
+
+ if (!pf) {
+ dev_err(&pdev->dev, "%s: unrecoverable device error %d\n",
+ __func__, err);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ if (!test_bit(ICE_SUSPENDED, pf->state)) {
+ ice_service_task_stop(pf);
+
+ if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
+ set_bit(ICE_PFR_REQ, pf->state);
+ ice_prepare_for_reset(pf, ICE_RESET_PFR);
+ }
+ }
+
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * ice_pci_err_slot_reset - a PCI slot reset has just happened
+ * @pdev: PCI device information struct
+ *
+ * Called to determine if the driver can recover from the PCI slot reset by
+ * using a register read to determine if the device is recoverable.
+ */
+static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev)
+{
+ struct ice_pf *pf = pci_get_drvdata(pdev);
+ pci_ers_result_t result;
+ int err;
+ u32 reg;
+
+ err = pci_enable_device_mem(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "Cannot re-enable PCI device after reset, error %d\n",
+ err);
+ result = PCI_ERS_RESULT_DISCONNECT;
+ } else {
+ pci_set_master(pdev);
+ pci_restore_state(pdev);
+ pci_save_state(pdev);
+ pci_wake_from_d3(pdev, false);
+
+ /* Check for life */
+ reg = rd32(&pf->hw, GLGEN_RTRIG);
+ if (!reg)
+ result = PCI_ERS_RESULT_RECOVERED;
+ else
+ result = PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ return result;
+}
+
+/**
+ * ice_pci_err_resume - restart operations after PCI error recovery
+ * @pdev: PCI device information struct
+ *
+ * Called to allow the driver to bring things back up after PCI error and/or
+ * reset recovery have finished
+ */
+static void ice_pci_err_resume(struct pci_dev *pdev)
+{
+ struct ice_pf *pf = pci_get_drvdata(pdev);
+
+ if (!pf) {
+ dev_err(&pdev->dev, "%s failed, device is unrecoverable\n",
+ __func__);
+ return;
+ }
+
+ if (test_bit(ICE_SUSPENDED, pf->state)) {
+ dev_dbg(&pdev->dev, "%s failed to resume normal operations!\n",
+ __func__);
+ return;
+ }
+
+ ice_restore_all_vfs_msi_state(pdev);
+
+ ice_do_reset(pf, ICE_RESET_PFR);
+ ice_service_task_restart(pf);
+ mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
+}
+
+/**
+ * ice_pci_err_reset_prepare - prepare device driver for PCI reset
+ * @pdev: PCI device information struct
+ */
+static void ice_pci_err_reset_prepare(struct pci_dev *pdev)
+{
+ struct ice_pf *pf = pci_get_drvdata(pdev);
+
+ if (!test_bit(ICE_SUSPENDED, pf->state)) {
+ ice_service_task_stop(pf);
+
+ if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
+ set_bit(ICE_PFR_REQ, pf->state);
+ ice_prepare_for_reset(pf, ICE_RESET_PFR);
+ }
+ }
+}
+
+/**
+ * ice_pci_err_reset_done - PCI reset done, device driver reset can begin
+ * @pdev: PCI device information struct
+ */
+static void ice_pci_err_reset_done(struct pci_dev *pdev)
+{
+ ice_pci_err_resume(pdev);
+}
+
+/* ice_pci_tbl - PCI Device ID Table
+ *
+ * Wildcard entries (PCI_ANY_ID) should come last
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ * Class, Class Mask, private data (not used) }
+ */
+static const struct pci_device_id ice_pci_tbl[] = {
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_BACKPLANE), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_QSFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_SFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_BACKPLANE), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_QSFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_10G_BASE_T), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SGMII), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_BACKPLANE), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_QSFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_10G_BASE_T), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SGMII), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_BACKPLANE), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_10G_BASE_T), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SGMII), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_BACKPLANE), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_SFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_10G_BASE_T), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_1GBE), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_QSFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822_SI_DFLT), 0 },
+ /* required last entry */
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, ice_pci_tbl);
+
+static __maybe_unused SIMPLE_DEV_PM_OPS(ice_pm_ops, ice_suspend, ice_resume);
+
+static const struct pci_error_handlers ice_pci_err_handler = {
+ .error_detected = ice_pci_err_detected,
+ .slot_reset = ice_pci_err_slot_reset,
+ .reset_prepare = ice_pci_err_reset_prepare,
+ .reset_done = ice_pci_err_reset_done,
+ .resume = ice_pci_err_resume
+};
+
+static struct pci_driver ice_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = ice_pci_tbl,
+ .probe = ice_probe,
+ .remove = ice_remove,
+#ifdef CONFIG_PM
+ .driver.pm = &ice_pm_ops,
+#endif /* CONFIG_PM */
+ .shutdown = ice_shutdown,
+ .sriov_configure = ice_sriov_configure,
+ .err_handler = &ice_pci_err_handler
+};
+
+/**
+ * ice_module_init - Driver registration routine
+ *
+ * ice_module_init is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ */
+static int __init ice_module_init(void)
+{
+ int status;
+
+ pr_info("%s\n", ice_driver_string);
+ pr_info("%s\n", ice_copyright);
+
+ ice_wq = alloc_workqueue("%s", 0, 0, KBUILD_MODNAME);
+ if (!ice_wq) {
+ pr_err("Failed to create workqueue\n");
+ return -ENOMEM;
+ }
+
+ status = pci_register_driver(&ice_driver);
+ if (status) {
+ pr_err("failed to register PCI driver, err %d\n", status);
+ destroy_workqueue(ice_wq);
+ }
+
+ return status;
+}
+module_init(ice_module_init);
+
+/**
+ * ice_module_exit - Driver exit cleanup routine
+ *
+ * ice_module_exit is called just before the driver is removed
+ * from memory.
+ */
+static void __exit ice_module_exit(void)
+{
+ pci_unregister_driver(&ice_driver);
+ destroy_workqueue(ice_wq);
+ pr_info("module unloaded\n");
+}
+module_exit(ice_module_exit);
+
+/**
+ * ice_set_mac_address - NDO callback to set MAC address
+ * @netdev: network interface device structure
+ * @pi: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_set_mac_address(struct net_device *netdev, void *pi)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ struct sockaddr *addr = pi;
+ u8 old_mac[ETH_ALEN];
+ u8 flags = 0;
+ u8 *mac;
+ int err;
+
+ mac = (u8 *)addr->sa_data;
+
+ if (!is_valid_ether_addr(mac))
+ return -EADDRNOTAVAIL;
+
+ if (ether_addr_equal(netdev->dev_addr, mac)) {
+ netdev_dbg(netdev, "already using mac %pM\n", mac);
+ return 0;
+ }
+
+ if (test_bit(ICE_DOWN, pf->state) ||
+ ice_is_reset_in_progress(pf->state)) {
+ netdev_err(netdev, "can't set mac %pM. device not ready\n",
+ mac);
+ return -EBUSY;
+ }
+
+ if (ice_chnl_dmac_fltr_cnt(pf)) {
+ netdev_err(netdev, "can't set mac %pM. Device has tc-flower filters, delete all of them and try again\n",
+ mac);
+ return -EAGAIN;
+ }
+
+ netif_addr_lock_bh(netdev);
+ ether_addr_copy(old_mac, netdev->dev_addr);
+ /* change the netdev's MAC address */
+ eth_hw_addr_set(netdev, mac);
+ netif_addr_unlock_bh(netdev);
+
+ /* Clean up old MAC filter. Not an error if old filter doesn't exist */
+ err = ice_fltr_remove_mac(vsi, old_mac, ICE_FWD_TO_VSI);
+ if (err && err != -ENOENT) {
+ err = -EADDRNOTAVAIL;
+ goto err_update_filters;
+ }
+
+ /* Add filter for new MAC. If filter exists, return success */
+ err = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI);
+ if (err == -EEXIST) {
+ /* Although this MAC filter is already present in hardware it's
+ * possible in some cases (e.g. bonding) that dev_addr was
+ * modified outside of the driver and needs to be restored back
+ * to this value.
+ */
+ netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac);
+
+ return 0;
+ } else if (err) {
+ /* error if the new filter addition failed */
+ err = -EADDRNOTAVAIL;
+ }
+
+err_update_filters:
+ if (err) {
+ netdev_err(netdev, "can't set MAC %pM. filter update failed\n",
+ mac);
+ netif_addr_lock_bh(netdev);
+ eth_hw_addr_set(netdev, old_mac);
+ netif_addr_unlock_bh(netdev);
+ return err;
+ }
+
+ netdev_dbg(vsi->netdev, "updated MAC address to %pM\n",
+ netdev->dev_addr);
+
+ /* write new MAC address to the firmware */
+ flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL;
+ err = ice_aq_manage_mac_write(hw, mac, flags, NULL);
+ if (err) {
+ netdev_err(netdev, "can't set MAC %pM. write to firmware failed error %d\n",
+ mac, err);
+ }
+ return 0;
+}
+
+/**
+ * ice_set_rx_mode - NDO callback to set the netdev filters
+ * @netdev: network interface device structure
+ */
+static void ice_set_rx_mode(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+
+ if (!vsi)
+ return;
+
+ /* Set the flags to synchronize filters
+ * ndo_set_rx_mode may be triggered even without a change in netdev
+ * flags
+ */
+ set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
+ set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
+ set_bit(ICE_FLAG_FLTR_SYNC, vsi->back->flags);
+
+ /* schedule our worker thread which will take care of
+ * applying the new filter changes
+ */
+ ice_service_task_schedule(vsi->back);
+}
+
+/**
+ * ice_set_tx_maxrate - NDO callback to set the maximum per-queue bitrate
+ * @netdev: network interface device structure
+ * @queue_index: Queue ID
+ * @maxrate: maximum bandwidth in Mbps
+ */
+static int
+ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ u16 q_handle;
+ int status;
+ u8 tc;
+
+ /* Validate maxrate requested is within permitted range */
+ if (maxrate && (maxrate > (ICE_SCHED_MAX_BW / 1000))) {
+ netdev_err(netdev, "Invalid max rate %d specified for the queue %d\n",
+ maxrate, queue_index);
+ return -EINVAL;
+ }
+
+ q_handle = vsi->tx_rings[queue_index]->q_handle;
+ tc = ice_dcb_get_tc(vsi, queue_index);
+
+ /* Set BW back to default, when user set maxrate to 0 */
+ if (!maxrate)
+ status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc,
+ q_handle, ICE_MAX_BW);
+ else
+ status = ice_cfg_q_bw_lmt(vsi->port_info, vsi->idx, tc,
+ q_handle, ICE_MAX_BW, maxrate * 1000);
+ if (status)
+ netdev_err(netdev, "Unable to set Tx max rate, error %d\n",
+ status);
+
+ return status;
+}
+
+/**
+ * ice_fdb_add - add an entry to the hardware database
+ * @ndm: the input from the stack
+ * @tb: pointer to array of nladdr (unused)
+ * @dev: the net device pointer
+ * @addr: the MAC address entry being added
+ * @vid: VLAN ID
+ * @flags: instructions from stack about fdb operation
+ * @extack: netlink extended ack
+ */
+static int
+ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[],
+ struct net_device *dev, const unsigned char *addr, u16 vid,
+ u16 flags, struct netlink_ext_ack __always_unused *extack)
+{
+ int err;
+
+ if (vid) {
+ netdev_err(dev, "VLANs aren't supported yet for dev_uc|mc_add()\n");
+ return -EINVAL;
+ }
+ if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
+ netdev_err(dev, "FDB only supports static addresses\n");
+ return -EINVAL;
+ }
+
+ if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
+ err = dev_uc_add_excl(dev, addr);
+ else if (is_multicast_ether_addr(addr))
+ err = dev_mc_add_excl(dev, addr);
+ else
+ err = -EINVAL;
+
+ /* Only return duplicate errors if NLM_F_EXCL is set */
+ if (err == -EEXIST && !(flags & NLM_F_EXCL))
+ err = 0;
+
+ return err;
+}
+
+/**
+ * ice_fdb_del - delete an entry from the hardware database
+ * @ndm: the input from the stack
+ * @tb: pointer to array of nladdr (unused)
+ * @dev: the net device pointer
+ * @addr: the MAC address entry being added
+ * @vid: VLAN ID
+ * @extack: netlink extended ack
+ */
+static int
+ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[],
+ struct net_device *dev, const unsigned char *addr,
+ __always_unused u16 vid, struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (ndm->ndm_state & NUD_PERMANENT) {
+ netdev_err(dev, "FDB only supports static addresses\n");
+ return -EINVAL;
+ }
+
+ if (is_unicast_ether_addr(addr))
+ err = dev_uc_del(dev, addr);
+ else if (is_multicast_ether_addr(addr))
+ err = dev_mc_del(dev, addr);
+ else
+ err = -EINVAL;
+
+ return err;
+}
+
+#define NETIF_VLAN_OFFLOAD_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \
+ NETIF_F_HW_VLAN_CTAG_TX | \
+ NETIF_F_HW_VLAN_STAG_RX | \
+ NETIF_F_HW_VLAN_STAG_TX)
+
+#define NETIF_VLAN_STRIPPING_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \
+ NETIF_F_HW_VLAN_STAG_RX)
+
+#define NETIF_VLAN_FILTERING_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \
+ NETIF_F_HW_VLAN_STAG_FILTER)
+
+/**
+ * ice_fix_features - fix the netdev features flags based on device limitations
+ * @netdev: ptr to the netdev that flags are being fixed on
+ * @features: features that need to be checked and possibly fixed
+ *
+ * Make sure any fixups are made to features in this callback. This enables the
+ * driver to not have to check unsupported configurations throughout the driver
+ * because that's the responsiblity of this callback.
+ *
+ * Single VLAN Mode (SVM) Supported Features:
+ * NETIF_F_HW_VLAN_CTAG_FILTER
+ * NETIF_F_HW_VLAN_CTAG_RX
+ * NETIF_F_HW_VLAN_CTAG_TX
+ *
+ * Double VLAN Mode (DVM) Supported Features:
+ * NETIF_F_HW_VLAN_CTAG_FILTER
+ * NETIF_F_HW_VLAN_CTAG_RX
+ * NETIF_F_HW_VLAN_CTAG_TX
+ *
+ * NETIF_F_HW_VLAN_STAG_FILTER
+ * NETIF_HW_VLAN_STAG_RX
+ * NETIF_HW_VLAN_STAG_TX
+ *
+ * Features that need fixing:
+ * Cannot simultaneously enable CTAG and STAG stripping and/or insertion.
+ * These are mutually exlusive as the VSI context cannot support multiple
+ * VLAN ethertypes simultaneously for stripping and/or insertion. If this
+ * is not done, then default to clearing the requested STAG offload
+ * settings.
+ *
+ * All supported filtering has to be enabled or disabled together. For
+ * example, in DVM, CTAG and STAG filtering have to be enabled and disabled
+ * together. If this is not done, then default to VLAN filtering disabled.
+ * These are mutually exclusive as there is currently no way to
+ * enable/disable VLAN filtering based on VLAN ethertype when using VLAN
+ * prune rules.
+ */
+static netdev_features_t
+ice_fix_features(struct net_device *netdev, netdev_features_t features)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ netdev_features_t req_vlan_fltr, cur_vlan_fltr;
+ bool cur_ctag, cur_stag, req_ctag, req_stag;
+
+ cur_vlan_fltr = netdev->features & NETIF_VLAN_FILTERING_FEATURES;
+ cur_ctag = cur_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
+ cur_stag = cur_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
+
+ req_vlan_fltr = features & NETIF_VLAN_FILTERING_FEATURES;
+ req_ctag = req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
+ req_stag = req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
+
+ if (req_vlan_fltr != cur_vlan_fltr) {
+ if (ice_is_dvm_ena(&np->vsi->back->hw)) {
+ if (req_ctag && req_stag) {
+ features |= NETIF_VLAN_FILTERING_FEATURES;
+ } else if (!req_ctag && !req_stag) {
+ features &= ~NETIF_VLAN_FILTERING_FEATURES;
+ } else if ((!cur_ctag && req_ctag && !cur_stag) ||
+ (!cur_stag && req_stag && !cur_ctag)) {
+ features |= NETIF_VLAN_FILTERING_FEATURES;
+ netdev_warn(netdev, "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been enabled for both types.\n");
+ } else if ((cur_ctag && !req_ctag && cur_stag) ||
+ (cur_stag && !req_stag && cur_ctag)) {
+ features &= ~NETIF_VLAN_FILTERING_FEATURES;
+ netdev_warn(netdev, "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been disabled for both types.\n");
+ }
+ } else {
+ if (req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER)
+ netdev_warn(netdev, "cannot support requested 802.1ad filtering setting in SVM mode\n");
+
+ if (req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER)
+ features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ }
+ }
+
+ if ((features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX)) &&
+ (features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX))) {
+ netdev_warn(netdev, "cannot support CTAG and STAG VLAN stripping and/or insertion simultaneously since CTAG and STAG offloads are mutually exclusive, clearing STAG offload settings\n");
+ features &= ~(NETIF_F_HW_VLAN_STAG_RX |
+ NETIF_F_HW_VLAN_STAG_TX);
+ }
+
+ if (!(netdev->features & NETIF_F_RXFCS) &&
+ (features & NETIF_F_RXFCS) &&
+ (features & NETIF_VLAN_STRIPPING_FEATURES) &&
+ !ice_vsi_has_non_zero_vlans(np->vsi)) {
+ netdev_warn(netdev, "Disabling VLAN stripping as FCS/CRC stripping is also disabled and there is no VLAN configured\n");
+ features &= ~NETIF_VLAN_STRIPPING_FEATURES;
+ }
+
+ return features;
+}
+
+/**
+ * ice_set_vlan_offload_features - set VLAN offload features for the PF VSI
+ * @vsi: PF's VSI
+ * @features: features used to determine VLAN offload settings
+ *
+ * First, determine the vlan_ethertype based on the VLAN offload bits in
+ * features. Then determine if stripping and insertion should be enabled or
+ * disabled. Finally enable or disable VLAN stripping and insertion.
+ */
+static int
+ice_set_vlan_offload_features(struct ice_vsi *vsi, netdev_features_t features)
+{
+ bool enable_stripping = true, enable_insertion = true;
+ struct ice_vsi_vlan_ops *vlan_ops;
+ int strip_err = 0, insert_err = 0;
+ u16 vlan_ethertype = 0;
+
+ vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+
+ if (features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX))
+ vlan_ethertype = ETH_P_8021AD;
+ else if (features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX))
+ vlan_ethertype = ETH_P_8021Q;
+
+ if (!(features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_CTAG_RX)))
+ enable_stripping = false;
+ if (!(features & (NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_CTAG_TX)))
+ enable_insertion = false;
+
+ if (enable_stripping)
+ strip_err = vlan_ops->ena_stripping(vsi, vlan_ethertype);
+ else
+ strip_err = vlan_ops->dis_stripping(vsi);
+
+ if (enable_insertion)
+ insert_err = vlan_ops->ena_insertion(vsi, vlan_ethertype);
+ else
+ insert_err = vlan_ops->dis_insertion(vsi);
+
+ if (strip_err || insert_err)
+ return -EIO;
+
+ return 0;
+}
+
+/**
+ * ice_set_vlan_filtering_features - set VLAN filtering features for the PF VSI
+ * @vsi: PF's VSI
+ * @features: features used to determine VLAN filtering settings
+ *
+ * Enable or disable Rx VLAN filtering based on the VLAN filtering bits in the
+ * features.
+ */
+static int
+ice_set_vlan_filtering_features(struct ice_vsi *vsi, netdev_features_t features)
+{
+ struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+ int err = 0;
+
+ /* support Single VLAN Mode (SVM) and Double VLAN Mode (DVM) by checking
+ * if either bit is set
+ */
+ if (features &
+ (NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER))
+ err = vlan_ops->ena_rx_filtering(vsi);
+ else
+ err = vlan_ops->dis_rx_filtering(vsi);
+
+ return err;
+}
+
+/**
+ * ice_set_vlan_features - set VLAN settings based on suggested feature set
+ * @netdev: ptr to the netdev being adjusted
+ * @features: the feature set that the stack is suggesting
+ *
+ * Only update VLAN settings if the requested_vlan_features are different than
+ * the current_vlan_features.
+ */
+static int
+ice_set_vlan_features(struct net_device *netdev, netdev_features_t features)
+{
+ netdev_features_t current_vlan_features, requested_vlan_features;
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ int err;
+
+ current_vlan_features = netdev->features & NETIF_VLAN_OFFLOAD_FEATURES;
+ requested_vlan_features = features & NETIF_VLAN_OFFLOAD_FEATURES;
+ if (current_vlan_features ^ requested_vlan_features) {
+ if ((features & NETIF_F_RXFCS) &&
+ (features & NETIF_VLAN_STRIPPING_FEATURES)) {
+ dev_err(ice_pf_to_dev(vsi->back),
+ "To enable VLAN stripping, you must first enable FCS/CRC stripping\n");
+ return -EIO;
+ }
+
+ err = ice_set_vlan_offload_features(vsi, features);
+ if (err)
+ return err;
+ }
+
+ current_vlan_features = netdev->features &
+ NETIF_VLAN_FILTERING_FEATURES;
+ requested_vlan_features = features & NETIF_VLAN_FILTERING_FEATURES;
+ if (current_vlan_features ^ requested_vlan_features) {
+ err = ice_set_vlan_filtering_features(vsi, features);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_set_loopback - turn on/off loopback mode on underlying PF
+ * @vsi: ptr to VSI
+ * @ena: flag to indicate the on/off setting
+ */
+static int ice_set_loopback(struct ice_vsi *vsi, bool ena)
+{
+ bool if_running = netif_running(vsi->netdev);
+ int ret;
+
+ if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
+ ret = ice_down(vsi);
+ if (ret) {
+ netdev_err(vsi->netdev, "Preparing device to toggle loopback failed\n");
+ return ret;
+ }
+ }
+ ret = ice_aq_set_mac_loopback(&vsi->back->hw, ena, NULL);
+ if (ret)
+ netdev_err(vsi->netdev, "Failed to toggle loopback state\n");
+ if (if_running)
+ ret = ice_up(vsi);
+
+ return ret;
+}
+
+/**
+ * ice_set_features - set the netdev feature flags
+ * @netdev: ptr to the netdev being adjusted
+ * @features: the feature set that the stack is suggesting
+ */
+static int
+ice_set_features(struct net_device *netdev, netdev_features_t features)
+{
+ netdev_features_t changed = netdev->features ^ features;
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ int ret = 0;
+
+ /* Don't set any netdev advanced features with device in Safe Mode */
+ if (ice_is_safe_mode(pf)) {
+ dev_err(ice_pf_to_dev(pf),
+ "Device is in Safe Mode - not enabling advanced netdev features\n");
+ return ret;
+ }
+
+ /* Do not change setting during reset */
+ if (ice_is_reset_in_progress(pf->state)) {
+ dev_err(ice_pf_to_dev(pf),
+ "Device is resetting, changing advanced netdev features temporarily unavailable.\n");
+ return -EBUSY;
+ }
+
+ /* Multiple features can be changed in one call so keep features in
+ * separate if/else statements to guarantee each feature is checked
+ */
+ if (changed & NETIF_F_RXHASH)
+ ice_vsi_manage_rss_lut(vsi, !!(features & NETIF_F_RXHASH));
+
+ ret = ice_set_vlan_features(netdev, features);
+ if (ret)
+ return ret;
+
+ /* Turn on receive of FCS aka CRC, and after setting this
+ * flag the packet data will have the 4 byte CRC appended
+ */
+ if (changed & NETIF_F_RXFCS) {
+ if ((features & NETIF_F_RXFCS) &&
+ (features & NETIF_VLAN_STRIPPING_FEATURES)) {
+ dev_err(ice_pf_to_dev(vsi->back),
+ "To disable FCS/CRC stripping, you must first disable VLAN stripping\n");
+ return -EIO;
+ }
+
+ ice_vsi_cfg_crc_strip(vsi, !!(features & NETIF_F_RXFCS));
+ ret = ice_down_up(vsi);
+ if (ret)
+ return ret;
+ }
+
+ if (changed & NETIF_F_NTUPLE) {
+ bool ena = !!(features & NETIF_F_NTUPLE);
+
+ ice_vsi_manage_fdir(vsi, ena);
+ ena ? ice_init_arfs(vsi) : ice_clear_arfs(vsi);
+ }
+
+ /* don't turn off hw_tc_offload when ADQ is already enabled */
+ if (!(features & NETIF_F_HW_TC) && ice_is_adq_active(pf)) {
+ dev_err(ice_pf_to_dev(pf), "ADQ is active, can't turn hw_tc_offload off\n");
+ return -EACCES;
+ }
+
+ if (changed & NETIF_F_HW_TC) {
+ bool ena = !!(features & NETIF_F_HW_TC);
+
+ ena ? set_bit(ICE_FLAG_CLS_FLOWER, pf->flags) :
+ clear_bit(ICE_FLAG_CLS_FLOWER, pf->flags);
+ }
+
+ if (changed & NETIF_F_LOOPBACK)
+ ret = ice_set_loopback(vsi, !!(features & NETIF_F_LOOPBACK));
+
+ return ret;
+}
+
+/**
+ * ice_vsi_vlan_setup - Setup VLAN offload properties on a PF VSI
+ * @vsi: VSI to setup VLAN properties for
+ */
+static int ice_vsi_vlan_setup(struct ice_vsi *vsi)
+{
+ int err;
+
+ err = ice_set_vlan_offload_features(vsi, vsi->netdev->features);
+ if (err)
+ return err;
+
+ err = ice_set_vlan_filtering_features(vsi, vsi->netdev->features);
+ if (err)
+ return err;
+
+ return ice_vsi_add_vlan_zero(vsi);
+}
+
+/**
+ * ice_vsi_cfg - Setup the VSI
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and negative value on error
+ */
+int ice_vsi_cfg(struct ice_vsi *vsi)
+{
+ int err;
+
+ if (vsi->netdev && vsi->type == ICE_VSI_PF) {
+ ice_set_rx_mode(vsi->netdev);
+
+ err = ice_vsi_vlan_setup(vsi);
+ if (err)
+ return err;
+ }
+ ice_vsi_cfg_dcb_rings(vsi);
+
+ err = ice_vsi_cfg_lan_txqs(vsi);
+ if (!err && ice_is_xdp_ena_vsi(vsi))
+ err = ice_vsi_cfg_xdp_txqs(vsi);
+ if (!err)
+ err = ice_vsi_cfg_rxqs(vsi);
+
+ return err;
+}
+
+/* THEORY OF MODERATION:
+ * The ice driver hardware works differently than the hardware that DIMLIB was
+ * originally made for. ice hardware doesn't have packet count limits that
+ * can trigger an interrupt, but it *does* have interrupt rate limit support,
+ * which is hard-coded to a limit of 250,000 ints/second.
+ * If not using dynamic moderation, the INTRL value can be modified
+ * by ethtool rx-usecs-high.
+ */
+struct ice_dim {
+ /* the throttle rate for interrupts, basically worst case delay before
+ * an initial interrupt fires, value is stored in microseconds.
+ */
+ u16 itr;
+};
+
+/* Make a different profile for Rx that doesn't allow quite so aggressive
+ * moderation at the high end (it maxes out at 126us or about 8k interrupts a
+ * second.
+ */
+static const struct ice_dim rx_profile[] = {
+ {2}, /* 500,000 ints/s, capped at 250K by INTRL */
+ {8}, /* 125,000 ints/s */
+ {16}, /* 62,500 ints/s */
+ {62}, /* 16,129 ints/s */
+ {126} /* 7,936 ints/s */
+};
+
+/* The transmit profile, which has the same sorts of values
+ * as the previous struct
+ */
+static const struct ice_dim tx_profile[] = {
+ {2}, /* 500,000 ints/s, capped at 250K by INTRL */
+ {8}, /* 125,000 ints/s */
+ {40}, /* 16,125 ints/s */
+ {128}, /* 7,812 ints/s */
+ {256} /* 3,906 ints/s */
+};
+
+static void ice_tx_dim_work(struct work_struct *work)
+{
+ struct ice_ring_container *rc;
+ struct dim *dim;
+ u16 itr;
+
+ dim = container_of(work, struct dim, work);
+ rc = (struct ice_ring_container *)dim->priv;
+
+ WARN_ON(dim->profile_ix >= ARRAY_SIZE(tx_profile));
+
+ /* look up the values in our local table */
+ itr = tx_profile[dim->profile_ix].itr;
+
+ ice_trace(tx_dim_work, container_of(rc, struct ice_q_vector, tx), dim);
+ ice_write_itr(rc, itr);
+
+ dim->state = DIM_START_MEASURE;
+}
+
+static void ice_rx_dim_work(struct work_struct *work)
+{
+ struct ice_ring_container *rc;
+ struct dim *dim;
+ u16 itr;
+
+ dim = container_of(work, struct dim, work);
+ rc = (struct ice_ring_container *)dim->priv;
+
+ WARN_ON(dim->profile_ix >= ARRAY_SIZE(rx_profile));
+
+ /* look up the values in our local table */
+ itr = rx_profile[dim->profile_ix].itr;
+
+ ice_trace(rx_dim_work, container_of(rc, struct ice_q_vector, rx), dim);
+ ice_write_itr(rc, itr);
+
+ dim->state = DIM_START_MEASURE;
+}
+
+#define ICE_DIM_DEFAULT_PROFILE_IX 1
+
+/**
+ * ice_init_moderation - set up interrupt moderation
+ * @q_vector: the vector containing rings to be configured
+ *
+ * Set up interrupt moderation registers, with the intent to do the right thing
+ * when called from reset or from probe, and whether or not dynamic moderation
+ * is enabled or not. Take special care to write all the registers in both
+ * dynamic moderation mode or not in order to make sure hardware is in a known
+ * state.
+ */
+static void ice_init_moderation(struct ice_q_vector *q_vector)
+{
+ struct ice_ring_container *rc;
+ bool tx_dynamic, rx_dynamic;
+
+ rc = &q_vector->tx;
+ INIT_WORK(&rc->dim.work, ice_tx_dim_work);
+ rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX;
+ rc->dim.priv = rc;
+ tx_dynamic = ITR_IS_DYNAMIC(rc);
+
+ /* set the initial TX ITR to match the above */
+ ice_write_itr(rc, tx_dynamic ?
+ tx_profile[rc->dim.profile_ix].itr : rc->itr_setting);
+
+ rc = &q_vector->rx;
+ INIT_WORK(&rc->dim.work, ice_rx_dim_work);
+ rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX;
+ rc->dim.priv = rc;
+ rx_dynamic = ITR_IS_DYNAMIC(rc);
+
+ /* set the initial RX ITR to match the above */
+ ice_write_itr(rc, rx_dynamic ? rx_profile[rc->dim.profile_ix].itr :
+ rc->itr_setting);
+
+ ice_set_q_vector_intrl(q_vector);
+}
+
+/**
+ * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI
+ * @vsi: the VSI being configured
+ */
+static void ice_napi_enable_all(struct ice_vsi *vsi)
+{
+ int q_idx;
+
+ if (!vsi->netdev)
+ return;
+
+ ice_for_each_q_vector(vsi, q_idx) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+ ice_init_moderation(q_vector);
+
+ if (q_vector->rx.rx_ring || q_vector->tx.tx_ring)
+ napi_enable(&q_vector->napi);
+ }
+}
+
+/**
+ * ice_up_complete - Finish the last steps of bringing up a connection
+ * @vsi: The VSI being configured
+ *
+ * Return 0 on success and negative value on error
+ */
+static int ice_up_complete(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ int err;
+
+ ice_vsi_cfg_msix(vsi);
+
+ /* Enable only Rx rings, Tx rings were enabled by the FW when the
+ * Tx queue group list was configured and the context bits were
+ * programmed using ice_vsi_cfg_txqs
+ */
+ err = ice_vsi_start_all_rx_rings(vsi);
+ if (err)
+ return err;
+
+ clear_bit(ICE_VSI_DOWN, vsi->state);
+ ice_napi_enable_all(vsi);
+ ice_vsi_ena_irq(vsi);
+
+ if (vsi->port_info &&
+ (vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) &&
+ vsi->netdev && vsi->type == ICE_VSI_PF) {
+ ice_print_link_msg(vsi, true);
+ netif_tx_start_all_queues(vsi->netdev);
+ netif_carrier_on(vsi->netdev);
+ if (!ice_is_e810(&pf->hw))
+ ice_ptp_link_change(pf, pf->hw.pf_id, true);
+ }
+
+ /* Perform an initial read of the statistics registers now to
+ * set the baseline so counters are ready when interface is up
+ */
+ ice_update_eth_stats(vsi);
+
+ if (vsi->type == ICE_VSI_PF)
+ ice_service_task_schedule(pf);
+
+ return 0;
+}
+
+/**
+ * ice_up - Bring the connection back up after being down
+ * @vsi: VSI being configured
+ */
+int ice_up(struct ice_vsi *vsi)
+{
+ int err;
+
+ err = ice_vsi_cfg(vsi);
+ if (!err)
+ err = ice_up_complete(vsi);
+
+ return err;
+}
+
+/**
+ * ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring
+ * @syncp: pointer to u64_stats_sync
+ * @stats: stats that pkts and bytes count will be taken from
+ * @pkts: packets stats counter
+ * @bytes: bytes stats counter
+ *
+ * This function fetches stats from the ring considering the atomic operations
+ * that needs to be performed to read u64 values in 32 bit machine.
+ */
+void
+ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp,
+ struct ice_q_stats stats, u64 *pkts, u64 *bytes)
+{
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_irq(syncp);
+ *pkts = stats.pkts;
+ *bytes = stats.bytes;
+ } while (u64_stats_fetch_retry_irq(syncp, start));
+}
+
+/**
+ * ice_update_vsi_tx_ring_stats - Update VSI Tx ring stats counters
+ * @vsi: the VSI to be updated
+ * @vsi_stats: the stats struct to be updated
+ * @rings: rings to work on
+ * @count: number of rings
+ */
+static void
+ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi,
+ struct rtnl_link_stats64 *vsi_stats,
+ struct ice_tx_ring **rings, u16 count)
+{
+ u16 i;
+
+ for (i = 0; i < count; i++) {
+ struct ice_tx_ring *ring;
+ u64 pkts = 0, bytes = 0;
+
+ ring = READ_ONCE(rings[i]);
+ if (!ring)
+ continue;
+ ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes);
+ vsi_stats->tx_packets += pkts;
+ vsi_stats->tx_bytes += bytes;
+ vsi->tx_restart += ring->tx_stats.restart_q;
+ vsi->tx_busy += ring->tx_stats.tx_busy;
+ vsi->tx_linearize += ring->tx_stats.tx_linearize;
+ }
+}
+
+/**
+ * ice_update_vsi_ring_stats - Update VSI stats counters
+ * @vsi: the VSI to be updated
+ */
+static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
+{
+ struct rtnl_link_stats64 *vsi_stats;
+ u64 pkts, bytes;
+ int i;
+
+ vsi_stats = kzalloc(sizeof(*vsi_stats), GFP_ATOMIC);
+ if (!vsi_stats)
+ return;
+
+ /* reset non-netdev (extended) stats */
+ vsi->tx_restart = 0;
+ vsi->tx_busy = 0;
+ vsi->tx_linearize = 0;
+ vsi->rx_buf_failed = 0;
+ vsi->rx_page_failed = 0;
+
+ rcu_read_lock();
+
+ /* update Tx rings counters */
+ ice_update_vsi_tx_ring_stats(vsi, vsi_stats, vsi->tx_rings,
+ vsi->num_txq);
+
+ /* update Rx rings counters */
+ ice_for_each_rxq(vsi, i) {
+ struct ice_rx_ring *ring = READ_ONCE(vsi->rx_rings[i]);
+
+ ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes);
+ vsi_stats->rx_packets += pkts;
+ vsi_stats->rx_bytes += bytes;
+ vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed;
+ vsi->rx_page_failed += ring->rx_stats.alloc_page_failed;
+ }
+
+ /* update XDP Tx rings counters */
+ if (ice_is_xdp_ena_vsi(vsi))
+ ice_update_vsi_tx_ring_stats(vsi, vsi_stats, vsi->xdp_rings,
+ vsi->num_xdp_txq);
+
+ rcu_read_unlock();
+
+ vsi->net_stats.tx_packets = vsi_stats->tx_packets;
+ vsi->net_stats.tx_bytes = vsi_stats->tx_bytes;
+ vsi->net_stats.rx_packets = vsi_stats->rx_packets;
+ vsi->net_stats.rx_bytes = vsi_stats->rx_bytes;
+
+ kfree(vsi_stats);
+}
+
+/**
+ * ice_update_vsi_stats - Update VSI stats counters
+ * @vsi: the VSI to be updated
+ */
+void ice_update_vsi_stats(struct ice_vsi *vsi)
+{
+ struct rtnl_link_stats64 *cur_ns = &vsi->net_stats;
+ struct ice_eth_stats *cur_es = &vsi->eth_stats;
+ struct ice_pf *pf = vsi->back;
+
+ if (test_bit(ICE_VSI_DOWN, vsi->state) ||
+ test_bit(ICE_CFG_BUSY, pf->state))
+ return;
+
+ /* get stats as recorded by Tx/Rx rings */
+ ice_update_vsi_ring_stats(vsi);
+
+ /* get VSI stats as recorded by the hardware */
+ ice_update_eth_stats(vsi);
+
+ cur_ns->tx_errors = cur_es->tx_errors;
+ cur_ns->rx_dropped = cur_es->rx_discards;
+ cur_ns->tx_dropped = cur_es->tx_discards;
+ cur_ns->multicast = cur_es->rx_multicast;
+
+ /* update some more netdev stats if this is main VSI */
+ if (vsi->type == ICE_VSI_PF) {
+ cur_ns->rx_crc_errors = pf->stats.crc_errors;
+ cur_ns->rx_errors = pf->stats.crc_errors +
+ pf->stats.illegal_bytes +
+ pf->stats.rx_len_errors +
+ pf->stats.rx_undersize +
+ pf->hw_csum_rx_error +
+ pf->stats.rx_jabber +
+ pf->stats.rx_fragments +
+ pf->stats.rx_oversize;
+ cur_ns->rx_length_errors = pf->stats.rx_len_errors;
+ /* record drops from the port level */
+ cur_ns->rx_missed_errors = pf->stats.eth.rx_discards;
+ }
+}
+
+/**
+ * ice_update_pf_stats - Update PF port stats counters
+ * @pf: PF whose stats needs to be updated
+ */
+void ice_update_pf_stats(struct ice_pf *pf)
+{
+ struct ice_hw_port_stats *prev_ps, *cur_ps;
+ struct ice_hw *hw = &pf->hw;
+ u16 fd_ctr_base;
+ u8 port;
+
+ port = hw->port_info->lport;
+ prev_ps = &pf->stats_prev;
+ cur_ps = &pf->stats;
+
+ ice_stat_update40(hw, GLPRT_GORCL(port), pf->stat_prev_loaded,
+ &prev_ps->eth.rx_bytes,
+ &cur_ps->eth.rx_bytes);
+
+ ice_stat_update40(hw, GLPRT_UPRCL(port), pf->stat_prev_loaded,
+ &prev_ps->eth.rx_unicast,
+ &cur_ps->eth.rx_unicast);
+
+ ice_stat_update40(hw, GLPRT_MPRCL(port), pf->stat_prev_loaded,
+ &prev_ps->eth.rx_multicast,
+ &cur_ps->eth.rx_multicast);
+
+ ice_stat_update40(hw, GLPRT_BPRCL(port), pf->stat_prev_loaded,
+ &prev_ps->eth.rx_broadcast,
+ &cur_ps->eth.rx_broadcast);
+
+ ice_stat_update32(hw, PRTRPB_RDPC, pf->stat_prev_loaded,
+ &prev_ps->eth.rx_discards,
+ &cur_ps->eth.rx_discards);
+
+ ice_stat_update40(hw, GLPRT_GOTCL(port), pf->stat_prev_loaded,
+ &prev_ps->eth.tx_bytes,
+ &cur_ps->eth.tx_bytes);
+
+ ice_stat_update40(hw, GLPRT_UPTCL(port), pf->stat_prev_loaded,
+ &prev_ps->eth.tx_unicast,
+ &cur_ps->eth.tx_unicast);
+
+ ice_stat_update40(hw, GLPRT_MPTCL(port), pf->stat_prev_loaded,
+ &prev_ps->eth.tx_multicast,
+ &cur_ps->eth.tx_multicast);
+
+ ice_stat_update40(hw, GLPRT_BPTCL(port), pf->stat_prev_loaded,
+ &prev_ps->eth.tx_broadcast,
+ &cur_ps->eth.tx_broadcast);
+
+ ice_stat_update32(hw, GLPRT_TDOLD(port), pf->stat_prev_loaded,
+ &prev_ps->tx_dropped_link_down,
+ &cur_ps->tx_dropped_link_down);
+
+ ice_stat_update40(hw, GLPRT_PRC64L(port), pf->stat_prev_loaded,
+ &prev_ps->rx_size_64, &cur_ps->rx_size_64);
+
+ ice_stat_update40(hw, GLPRT_PRC127L(port), pf->stat_prev_loaded,
+ &prev_ps->rx_size_127, &cur_ps->rx_size_127);
+
+ ice_stat_update40(hw, GLPRT_PRC255L(port), pf->stat_prev_loaded,
+ &prev_ps->rx_size_255, &cur_ps->rx_size_255);
+
+ ice_stat_update40(hw, GLPRT_PRC511L(port), pf->stat_prev_loaded,
+ &prev_ps->rx_size_511, &cur_ps->rx_size_511);
+
+ ice_stat_update40(hw, GLPRT_PRC1023L(port), pf->stat_prev_loaded,
+ &prev_ps->rx_size_1023, &cur_ps->rx_size_1023);
+
+ ice_stat_update40(hw, GLPRT_PRC1522L(port), pf->stat_prev_loaded,
+ &prev_ps->rx_size_1522, &cur_ps->rx_size_1522);
+
+ ice_stat_update40(hw, GLPRT_PRC9522L(port), pf->stat_prev_loaded,
+ &prev_ps->rx_size_big, &cur_ps->rx_size_big);
+
+ ice_stat_update40(hw, GLPRT_PTC64L(port), pf->stat_prev_loaded,
+ &prev_ps->tx_size_64, &cur_ps->tx_size_64);
+
+ ice_stat_update40(hw, GLPRT_PTC127L(port), pf->stat_prev_loaded,
+ &prev_ps->tx_size_127, &cur_ps->tx_size_127);
+
+ ice_stat_update40(hw, GLPRT_PTC255L(port), pf->stat_prev_loaded,
+ &prev_ps->tx_size_255, &cur_ps->tx_size_255);
+
+ ice_stat_update40(hw, GLPRT_PTC511L(port), pf->stat_prev_loaded,
+ &prev_ps->tx_size_511, &cur_ps->tx_size_511);
+
+ ice_stat_update40(hw, GLPRT_PTC1023L(port), pf->stat_prev_loaded,
+ &prev_ps->tx_size_1023, &cur_ps->tx_size_1023);
+
+ ice_stat_update40(hw, GLPRT_PTC1522L(port), pf->stat_prev_loaded,
+ &prev_ps->tx_size_1522, &cur_ps->tx_size_1522);
+
+ ice_stat_update40(hw, GLPRT_PTC9522L(port), pf->stat_prev_loaded,
+ &prev_ps->tx_size_big, &cur_ps->tx_size_big);
+
+ fd_ctr_base = hw->fd_ctr_base;
+
+ ice_stat_update40(hw,
+ GLSTAT_FD_CNT0L(ICE_FD_SB_STAT_IDX(fd_ctr_base)),
+ pf->stat_prev_loaded, &prev_ps->fd_sb_match,
+ &cur_ps->fd_sb_match);
+ ice_stat_update32(hw, GLPRT_LXONRXC(port), pf->stat_prev_loaded,
+ &prev_ps->link_xon_rx, &cur_ps->link_xon_rx);
+
+ ice_stat_update32(hw, GLPRT_LXOFFRXC(port), pf->stat_prev_loaded,
+ &prev_ps->link_xoff_rx, &cur_ps->link_xoff_rx);
+
+ ice_stat_update32(hw, GLPRT_LXONTXC(port), pf->stat_prev_loaded,
+ &prev_ps->link_xon_tx, &cur_ps->link_xon_tx);
+
+ ice_stat_update32(hw, GLPRT_LXOFFTXC(port), pf->stat_prev_loaded,
+ &prev_ps->link_xoff_tx, &cur_ps->link_xoff_tx);
+
+ ice_update_dcb_stats(pf);
+
+ ice_stat_update32(hw, GLPRT_CRCERRS(port), pf->stat_prev_loaded,
+ &prev_ps->crc_errors, &cur_ps->crc_errors);
+
+ ice_stat_update32(hw, GLPRT_ILLERRC(port), pf->stat_prev_loaded,
+ &prev_ps->illegal_bytes, &cur_ps->illegal_bytes);
+
+ ice_stat_update32(hw, GLPRT_MLFC(port), pf->stat_prev_loaded,
+ &prev_ps->mac_local_faults,
+ &cur_ps->mac_local_faults);
+
+ ice_stat_update32(hw, GLPRT_MRFC(port), pf->stat_prev_loaded,
+ &prev_ps->mac_remote_faults,
+ &cur_ps->mac_remote_faults);
+
+ ice_stat_update32(hw, GLPRT_RLEC(port), pf->stat_prev_loaded,
+ &prev_ps->rx_len_errors, &cur_ps->rx_len_errors);
+
+ ice_stat_update32(hw, GLPRT_RUC(port), pf->stat_prev_loaded,
+ &prev_ps->rx_undersize, &cur_ps->rx_undersize);
+
+ ice_stat_update32(hw, GLPRT_RFC(port), pf->stat_prev_loaded,
+ &prev_ps->rx_fragments, &cur_ps->rx_fragments);
+
+ ice_stat_update32(hw, GLPRT_ROC(port), pf->stat_prev_loaded,
+ &prev_ps->rx_oversize, &cur_ps->rx_oversize);
+
+ ice_stat_update32(hw, GLPRT_RJC(port), pf->stat_prev_loaded,
+ &prev_ps->rx_jabber, &cur_ps->rx_jabber);
+
+ cur_ps->fd_sb_status = test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1 : 0;
+
+ pf->stat_prev_loaded = true;
+}
+
+/**
+ * ice_get_stats64 - get statistics for network device structure
+ * @netdev: network interface device structure
+ * @stats: main device statistics structure
+ */
+static
+void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct rtnl_link_stats64 *vsi_stats;
+ struct ice_vsi *vsi = np->vsi;
+
+ vsi_stats = &vsi->net_stats;
+
+ if (!vsi->num_txq || !vsi->num_rxq)
+ return;
+
+ /* netdev packet/byte stats come from ring counter. These are obtained
+ * by summing up ring counters (done by ice_update_vsi_ring_stats).
+ * But, only call the update routine and read the registers if VSI is
+ * not down.
+ */
+ if (!test_bit(ICE_VSI_DOWN, vsi->state))
+ ice_update_vsi_ring_stats(vsi);
+ stats->tx_packets = vsi_stats->tx_packets;
+ stats->tx_bytes = vsi_stats->tx_bytes;
+ stats->rx_packets = vsi_stats->rx_packets;
+ stats->rx_bytes = vsi_stats->rx_bytes;
+
+ /* The rest of the stats can be read from the hardware but instead we
+ * just return values that the watchdog task has already obtained from
+ * the hardware.
+ */
+ stats->multicast = vsi_stats->multicast;
+ stats->tx_errors = vsi_stats->tx_errors;
+ stats->tx_dropped = vsi_stats->tx_dropped;
+ stats->rx_errors = vsi_stats->rx_errors;
+ stats->rx_dropped = vsi_stats->rx_dropped;
+ stats->rx_crc_errors = vsi_stats->rx_crc_errors;
+ stats->rx_length_errors = vsi_stats->rx_length_errors;
+}
+
+/**
+ * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI
+ * @vsi: VSI having NAPI disabled
+ */
+static void ice_napi_disable_all(struct ice_vsi *vsi)
+{
+ int q_idx;
+
+ if (!vsi->netdev)
+ return;
+
+ ice_for_each_q_vector(vsi, q_idx) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+ if (q_vector->rx.rx_ring || q_vector->tx.tx_ring)
+ napi_disable(&q_vector->napi);
+
+ cancel_work_sync(&q_vector->tx.dim.work);
+ cancel_work_sync(&q_vector->rx.dim.work);
+ }
+}
+
+/**
+ * ice_down - Shutdown the connection
+ * @vsi: The VSI being stopped
+ *
+ * Caller of this function is expected to set the vsi->state ICE_DOWN bit
+ */
+int ice_down(struct ice_vsi *vsi)
+{
+ int i, tx_err, rx_err, vlan_err = 0;
+
+ WARN_ON(!test_bit(ICE_VSI_DOWN, vsi->state));
+
+ if (vsi->netdev && vsi->type == ICE_VSI_PF) {
+ vlan_err = ice_vsi_del_vlan_zero(vsi);
+ if (!ice_is_e810(&vsi->back->hw))
+ ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false);
+ netif_carrier_off(vsi->netdev);
+ netif_tx_disable(vsi->netdev);
+ } else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) {
+ ice_eswitch_stop_all_tx_queues(vsi->back);
+ }
+
+ ice_vsi_dis_irq(vsi);
+
+ tx_err = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+ if (tx_err)
+ netdev_err(vsi->netdev, "Failed stop Tx rings, VSI %d error %d\n",
+ vsi->vsi_num, tx_err);
+ if (!tx_err && ice_is_xdp_ena_vsi(vsi)) {
+ tx_err = ice_vsi_stop_xdp_tx_rings(vsi);
+ if (tx_err)
+ netdev_err(vsi->netdev, "Failed stop XDP rings, VSI %d error %d\n",
+ vsi->vsi_num, tx_err);
+ }
+
+ rx_err = ice_vsi_stop_all_rx_rings(vsi);
+ if (rx_err)
+ netdev_err(vsi->netdev, "Failed stop Rx rings, VSI %d error %d\n",
+ vsi->vsi_num, rx_err);
+
+ ice_napi_disable_all(vsi);
+
+ ice_for_each_txq(vsi, i)
+ ice_clean_tx_ring(vsi->tx_rings[i]);
+
+ if (ice_is_xdp_ena_vsi(vsi))
+ ice_for_each_xdp_txq(vsi, i)
+ ice_clean_tx_ring(vsi->xdp_rings[i]);
+
+ ice_for_each_rxq(vsi, i)
+ ice_clean_rx_ring(vsi->rx_rings[i]);
+
+ if (tx_err || rx_err || vlan_err) {
+ netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n",
+ vsi->vsi_num, vsi->vsw->sw_id);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_down_up - shutdown the VSI connection and bring it up
+ * @vsi: the VSI to be reconnected
+ */
+int ice_down_up(struct ice_vsi *vsi)
+{
+ int ret;
+
+ /* if DOWN already set, nothing to do */
+ if (test_and_set_bit(ICE_VSI_DOWN, vsi->state))
+ return 0;
+
+ ret = ice_down(vsi);
+ if (ret)
+ return ret;
+
+ ret = ice_up(vsi);
+ if (ret) {
+ netdev_err(vsi->netdev, "reallocating resources failed during netdev features change, may need to reload driver\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources
+ * @vsi: VSI having resources allocated
+ *
+ * Return 0 on success, negative on failure
+ */
+int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
+{
+ int i, err = 0;
+
+ if (!vsi->num_txq) {
+ dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Tx queues\n",
+ vsi->vsi_num);
+ return -EINVAL;
+ }
+
+ ice_for_each_txq(vsi, i) {
+ struct ice_tx_ring *ring = vsi->tx_rings[i];
+
+ if (!ring)
+ return -EINVAL;
+
+ if (vsi->netdev)
+ ring->netdev = vsi->netdev;
+ err = ice_setup_tx_ring(ring);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+/**
+ * ice_vsi_setup_rx_rings - Allocate VSI Rx queue resources
+ * @vsi: VSI having resources allocated
+ *
+ * Return 0 on success, negative on failure
+ */
+int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
+{
+ int i, err = 0;
+
+ if (!vsi->num_rxq) {
+ dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Rx queues\n",
+ vsi->vsi_num);
+ return -EINVAL;
+ }
+
+ ice_for_each_rxq(vsi, i) {
+ struct ice_rx_ring *ring = vsi->rx_rings[i];
+
+ if (!ring)
+ return -EINVAL;
+
+ if (vsi->netdev)
+ ring->netdev = vsi->netdev;
+ err = ice_setup_rx_ring(ring);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+/**
+ * ice_vsi_open_ctrl - open control VSI for use
+ * @vsi: the VSI to open
+ *
+ * Initialization of the Control VSI
+ *
+ * Returns 0 on success, negative value on error
+ */
+int ice_vsi_open_ctrl(struct ice_vsi *vsi)
+{
+ char int_name[ICE_INT_NAME_STR_LEN];
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ int err;
+
+ dev = ice_pf_to_dev(pf);
+ /* allocate descriptors */
+ err = ice_vsi_setup_tx_rings(vsi);
+ if (err)
+ goto err_setup_tx;
+
+ err = ice_vsi_setup_rx_rings(vsi);
+ if (err)
+ goto err_setup_rx;
+
+ err = ice_vsi_cfg(vsi);
+ if (err)
+ goto err_setup_rx;
+
+ snprintf(int_name, sizeof(int_name) - 1, "%s-%s:ctrl",
+ dev_driver_string(dev), dev_name(dev));
+ err = ice_vsi_req_irq_msix(vsi, int_name);
+ if (err)
+ goto err_setup_rx;
+
+ ice_vsi_cfg_msix(vsi);
+
+ err = ice_vsi_start_all_rx_rings(vsi);
+ if (err)
+ goto err_up_complete;
+
+ clear_bit(ICE_VSI_DOWN, vsi->state);
+ ice_vsi_ena_irq(vsi);
+
+ return 0;
+
+err_up_complete:
+ ice_down(vsi);
+err_setup_rx:
+ ice_vsi_free_rx_rings(vsi);
+err_setup_tx:
+ ice_vsi_free_tx_rings(vsi);
+
+ return err;
+}
+
+/**
+ * ice_vsi_open - Called when a network interface is made active
+ * @vsi: the VSI to open
+ *
+ * Initialization of the VSI
+ *
+ * Returns 0 on success, negative value on error
+ */
+int ice_vsi_open(struct ice_vsi *vsi)
+{
+ char int_name[ICE_INT_NAME_STR_LEN];
+ struct ice_pf *pf = vsi->back;
+ int err;
+
+ /* allocate descriptors */
+ err = ice_vsi_setup_tx_rings(vsi);
+ if (err)
+ goto err_setup_tx;
+
+ err = ice_vsi_setup_rx_rings(vsi);
+ if (err)
+ goto err_setup_rx;
+
+ err = ice_vsi_cfg(vsi);
+ if (err)
+ goto err_setup_rx;
+
+ snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
+ dev_driver_string(ice_pf_to_dev(pf)), vsi->netdev->name);
+ err = ice_vsi_req_irq_msix(vsi, int_name);
+ if (err)
+ goto err_setup_rx;
+
+ ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc);
+
+ if (vsi->type == ICE_VSI_PF) {
+ /* Notify the stack of the actual queue counts. */
+ err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_txq);
+ if (err)
+ goto err_set_qs;
+
+ err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_rxq);
+ if (err)
+ goto err_set_qs;
+ }
+
+ err = ice_up_complete(vsi);
+ if (err)
+ goto err_up_complete;
+
+ return 0;
+
+err_up_complete:
+ ice_down(vsi);
+err_set_qs:
+ ice_vsi_free_irq(vsi);
+err_setup_rx:
+ ice_vsi_free_rx_rings(vsi);
+err_setup_tx:
+ ice_vsi_free_tx_rings(vsi);
+
+ return err;
+}
+
+/**
+ * ice_vsi_release_all - Delete all VSIs
+ * @pf: PF from which all VSIs are being removed
+ */
+static void ice_vsi_release_all(struct ice_pf *pf)
+{
+ int err, i;
+
+ if (!pf->vsi)
+ return;
+
+ ice_for_each_vsi(pf, i) {
+ if (!pf->vsi[i])
+ continue;
+
+ if (pf->vsi[i]->type == ICE_VSI_CHNL)
+ continue;
+
+ err = ice_vsi_release(pf->vsi[i]);
+ if (err)
+ dev_dbg(ice_pf_to_dev(pf), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n",
+ i, err, pf->vsi[i]->vsi_num);
+ }
+}
+
+/**
+ * ice_vsi_rebuild_by_type - Rebuild VSI of a given type
+ * @pf: pointer to the PF instance
+ * @type: VSI type to rebuild
+ *
+ * Iterates through the pf->vsi array and rebuilds VSIs of the requested type
+ */
+static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ int i, err;
+
+ ice_for_each_vsi(pf, i) {
+ struct ice_vsi *vsi = pf->vsi[i];
+
+ if (!vsi || vsi->type != type)
+ continue;
+
+ /* rebuild the VSI */
+ err = ice_vsi_rebuild(vsi, true);
+ if (err) {
+ dev_err(dev, "rebuild VSI failed, err %d, VSI index %d, type %s\n",
+ err, vsi->idx, ice_vsi_type_str(type));
+ return err;
+ }
+
+ /* replay filters for the VSI */
+ err = ice_replay_vsi(&pf->hw, vsi->idx);
+ if (err) {
+ dev_err(dev, "replay VSI failed, error %d, VSI index %d, type %s\n",
+ err, vsi->idx, ice_vsi_type_str(type));
+ return err;
+ }
+
+ /* Re-map HW VSI number, using VSI handle that has been
+ * previously validated in ice_replay_vsi() call above
+ */
+ vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
+
+ /* enable the VSI */
+ err = ice_ena_vsi(vsi, false);
+ if (err) {
+ dev_err(dev, "enable VSI failed, err %d, VSI index %d, type %s\n",
+ err, vsi->idx, ice_vsi_type_str(type));
+ return err;
+ }
+
+ dev_info(dev, "VSI rebuilt. VSI index %d, type %s\n", vsi->idx,
+ ice_vsi_type_str(type));
+ }
+
+ return 0;
+}
+
+/**
+ * ice_update_pf_netdev_link - Update PF netdev link status
+ * @pf: pointer to the PF instance
+ */
+static void ice_update_pf_netdev_link(struct ice_pf *pf)
+{
+ bool link_up;
+ int i;
+
+ ice_for_each_vsi(pf, i) {
+ struct ice_vsi *vsi = pf->vsi[i];
+
+ if (!vsi || vsi->type != ICE_VSI_PF)
+ return;
+
+ ice_get_link_status(pf->vsi[i]->port_info, &link_up);
+ if (link_up) {
+ netif_carrier_on(pf->vsi[i]->netdev);
+ netif_tx_wake_all_queues(pf->vsi[i]->netdev);
+ } else {
+ netif_carrier_off(pf->vsi[i]->netdev);
+ netif_tx_stop_all_queues(pf->vsi[i]->netdev);
+ }
+ }
+}
+
+/**
+ * ice_rebuild - rebuild after reset
+ * @pf: PF to rebuild
+ * @reset_type: type of reset
+ *
+ * Do not rebuild VF VSI in this flow because that is already handled via
+ * ice_reset_all_vfs(). This is because requirements for resetting a VF after a
+ * PFR/CORER/GLOBER/etc. are different than the normal flow. Also, we don't want
+ * to reset/rebuild all the VF VSI twice.
+ */
+static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ bool dvm;
+ int err;
+
+ if (test_bit(ICE_DOWN, pf->state))
+ goto clear_recovery;
+
+ dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type);
+
+#define ICE_EMP_RESET_SLEEP_MS 5000
+ if (reset_type == ICE_RESET_EMPR) {
+ /* If an EMP reset has occurred, any previously pending flash
+ * update will have completed. We no longer know whether or
+ * not the NVM update EMP reset is restricted.
+ */
+ pf->fw_emp_reset_disabled = false;
+
+ msleep(ICE_EMP_RESET_SLEEP_MS);
+ }
+
+ err = ice_init_all_ctrlq(hw);
+ if (err) {
+ dev_err(dev, "control queues init failed %d\n", err);
+ goto err_init_ctrlq;
+ }
+
+ /* if DDP was previously loaded successfully */
+ if (!ice_is_safe_mode(pf)) {
+ /* reload the SW DB of filter tables */
+ if (reset_type == ICE_RESET_PFR)
+ ice_fill_blk_tbls(hw);
+ else
+ /* Reload DDP Package after CORER/GLOBR reset */
+ ice_load_pkg(NULL, pf);
+ }
+
+ err = ice_clear_pf_cfg(hw);
+ if (err) {
+ dev_err(dev, "clear PF configuration failed %d\n", err);
+ goto err_init_ctrlq;
+ }
+
+ ice_clear_pxe_mode(hw);
+
+ err = ice_init_nvm(hw);
+ if (err) {
+ dev_err(dev, "ice_init_nvm failed %d\n", err);
+ goto err_init_ctrlq;
+ }
+
+ err = ice_get_caps(hw);
+ if (err) {
+ dev_err(dev, "ice_get_caps failed %d\n", err);
+ goto err_init_ctrlq;
+ }
+
+ err = ice_aq_set_mac_cfg(hw, ICE_AQ_SET_MAC_FRAME_SIZE_MAX, NULL);
+ if (err) {
+ dev_err(dev, "set_mac_cfg failed %d\n", err);
+ goto err_init_ctrlq;
+ }
+
+ dvm = ice_is_dvm_ena(hw);
+
+ err = ice_aq_set_port_params(pf->hw.port_info, dvm, NULL);
+ if (err)
+ goto err_init_ctrlq;
+
+ err = ice_sched_init_port(hw->port_info);
+ if (err)
+ goto err_sched_init_port;
+
+ /* start misc vector */
+ err = ice_req_irq_msix_misc(pf);
+ if (err) {
+ dev_err(dev, "misc vector setup failed: %d\n", err);
+ goto err_sched_init_port;
+ }
+
+ if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
+ wr32(hw, PFQF_FD_ENA, PFQF_FD_ENA_FD_ENA_M);
+ if (!rd32(hw, PFQF_FD_SIZE)) {
+ u16 unused, guar, b_effort;
+
+ guar = hw->func_caps.fd_fltr_guar;
+ b_effort = hw->func_caps.fd_fltr_best_effort;
+
+ /* force guaranteed filter pool for PF */
+ ice_alloc_fd_guar_item(hw, &unused, guar);
+ /* force shared filter pool for PF */
+ ice_alloc_fd_shrd_item(hw, &unused, b_effort);
+ }
+ }
+
+ if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+ ice_dcb_rebuild(pf);
+
+ /* If the PF previously had enabled PTP, PTP init needs to happen before
+ * the VSI rebuild. If not, this causes the PTP link status events to
+ * fail.
+ */
+ if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
+ ice_ptp_reset(pf);
+
+ if (ice_is_feature_supported(pf, ICE_F_GNSS))
+ ice_gnss_init(pf);
+
+ /* rebuild PF VSI */
+ err = ice_vsi_rebuild_by_type(pf, ICE_VSI_PF);
+ if (err) {
+ dev_err(dev, "PF VSI rebuild failed: %d\n", err);
+ goto err_vsi_rebuild;
+ }
+
+ /* configure PTP timestamping after VSI rebuild */
+ if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
+ ice_ptp_cfg_timestamp(pf, false);
+
+ err = ice_vsi_rebuild_by_type(pf, ICE_VSI_SWITCHDEV_CTRL);
+ if (err) {
+ dev_err(dev, "Switchdev CTRL VSI rebuild failed: %d\n", err);
+ goto err_vsi_rebuild;
+ }
+
+ if (reset_type == ICE_RESET_PFR) {
+ err = ice_rebuild_channels(pf);
+ if (err) {
+ dev_err(dev, "failed to rebuild and replay ADQ VSIs, err %d\n",
+ err);
+ goto err_vsi_rebuild;
+ }
+ }
+
+ /* If Flow Director is active */
+ if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
+ err = ice_vsi_rebuild_by_type(pf, ICE_VSI_CTRL);
+ if (err) {
+ dev_err(dev, "control VSI rebuild failed: %d\n", err);
+ goto err_vsi_rebuild;
+ }
+
+ /* replay HW Flow Director recipes */
+ if (hw->fdir_prof)
+ ice_fdir_replay_flows(hw);
+
+ /* replay Flow Director filters */
+ ice_fdir_replay_fltrs(pf);
+
+ ice_rebuild_arfs(pf);
+ }
+
+ ice_update_pf_netdev_link(pf);
+
+ /* tell the firmware we are up */
+ err = ice_send_version(pf);
+ if (err) {
+ dev_err(dev, "Rebuild failed due to error sending driver version: %d\n",
+ err);
+ goto err_vsi_rebuild;
+ }
+
+ ice_replay_post(hw);
+
+ /* if we get here, reset flow is successful */
+ clear_bit(ICE_RESET_FAILED, pf->state);
+
+ ice_plug_aux_dev(pf);
+ return;
+
+err_vsi_rebuild:
+err_sched_init_port:
+ ice_sched_cleanup_all(hw);
+err_init_ctrlq:
+ ice_shutdown_all_ctrlq(hw);
+ set_bit(ICE_RESET_FAILED, pf->state);
+clear_recovery:
+ /* set this bit in PF state to control service task scheduling */
+ set_bit(ICE_NEEDS_RESTART, pf->state);
+ dev_err(dev, "Rebuild failed, unload and reload driver\n");
+}
+
+/**
+ * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
+ * @vsi: Pointer to VSI structure
+ */
+static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
+{
+ if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+ return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM;
+ else
+ return ICE_RXBUF_3072;
+}
+
+/**
+ * ice_change_mtu - NDO callback to change the MTU
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ u8 count = 0;
+ int err = 0;
+
+ if (new_mtu == (int)netdev->mtu) {
+ netdev_warn(netdev, "MTU is already %u\n", netdev->mtu);
+ return 0;
+ }
+
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ int frame_size = ice_max_xdp_frame_size(vsi);
+
+ if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
+ netdev_err(netdev, "max MTU for XDP usage is %d\n",
+ frame_size - ICE_ETH_PKT_HDR_PAD);
+ return -EINVAL;
+ }
+ }
+
+ /* if a reset is in progress, wait for some time for it to complete */
+ do {
+ if (ice_is_reset_in_progress(pf->state)) {
+ count++;
+ usleep_range(1000, 2000);
+ } else {
+ break;
+ }
+
+ } while (count < 100);
+
+ if (count == 100) {
+ netdev_err(netdev, "can't change MTU. Device is busy\n");
+ return -EBUSY;
+ }
+
+ netdev->mtu = (unsigned int)new_mtu;
+
+ /* if VSI is up, bring it down and then back up */
+ if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
+ err = ice_down(vsi);
+ if (err) {
+ netdev_err(netdev, "change MTU if_down err %d\n", err);
+ return err;
+ }
+
+ err = ice_up(vsi);
+ if (err) {
+ netdev_err(netdev, "change MTU if_up err %d\n", err);
+ return err;
+ }
+ }
+
+ netdev_dbg(netdev, "changed MTU to %d\n", new_mtu);
+ set_bit(ICE_FLAG_MTU_CHANGED, pf->flags);
+
+ return err;
+}
+
+/**
+ * ice_eth_ioctl - Access the hwtstamp interface
+ * @netdev: network interface device structure
+ * @ifr: interface request data
+ * @cmd: ioctl command
+ */
+static int ice_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_pf *pf = np->vsi->back;
+
+ switch (cmd) {
+ case SIOCGHWTSTAMP:
+ return ice_ptp_get_ts_config(pf, ifr);
+ case SIOCSHWTSTAMP:
+ return ice_ptp_set_ts_config(pf, ifr);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+/**
+ * ice_aq_str - convert AQ err code to a string
+ * @aq_err: the AQ error code to convert
+ */
+const char *ice_aq_str(enum ice_aq_err aq_err)
+{
+ switch (aq_err) {
+ case ICE_AQ_RC_OK:
+ return "OK";
+ case ICE_AQ_RC_EPERM:
+ return "ICE_AQ_RC_EPERM";
+ case ICE_AQ_RC_ENOENT:
+ return "ICE_AQ_RC_ENOENT";
+ case ICE_AQ_RC_ENOMEM:
+ return "ICE_AQ_RC_ENOMEM";
+ case ICE_AQ_RC_EBUSY:
+ return "ICE_AQ_RC_EBUSY";
+ case ICE_AQ_RC_EEXIST:
+ return "ICE_AQ_RC_EEXIST";
+ case ICE_AQ_RC_EINVAL:
+ return "ICE_AQ_RC_EINVAL";
+ case ICE_AQ_RC_ENOSPC:
+ return "ICE_AQ_RC_ENOSPC";
+ case ICE_AQ_RC_ENOSYS:
+ return "ICE_AQ_RC_ENOSYS";
+ case ICE_AQ_RC_EMODE:
+ return "ICE_AQ_RC_EMODE";
+ case ICE_AQ_RC_ENOSEC:
+ return "ICE_AQ_RC_ENOSEC";
+ case ICE_AQ_RC_EBADSIG:
+ return "ICE_AQ_RC_EBADSIG";
+ case ICE_AQ_RC_ESVN:
+ return "ICE_AQ_RC_ESVN";
+ case ICE_AQ_RC_EBADMAN:
+ return "ICE_AQ_RC_EBADMAN";
+ case ICE_AQ_RC_EBADBUF:
+ return "ICE_AQ_RC_EBADBUF";
+ }
+
+ return "ICE_AQ_RC_UNKNOWN";
+}
+
+/**
+ * ice_set_rss_lut - Set RSS LUT
+ * @vsi: Pointer to VSI structure
+ * @lut: Lookup table
+ * @lut_size: Lookup table size
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size)
+{
+ struct ice_aq_get_set_rss_lut_params params = {};
+ struct ice_hw *hw = &vsi->back->hw;
+ int status;
+
+ if (!lut)
+ return -EINVAL;
+
+ params.vsi_handle = vsi->idx;
+ params.lut_size = lut_size;
+ params.lut_type = vsi->rss_lut_type;
+ params.lut = lut;
+
+ status = ice_aq_set_rss_lut(hw, &params);
+ if (status)
+ dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS lut, err %d aq_err %s\n",
+ status, ice_aq_str(hw->adminq.sq_last_status));
+
+ return status;
+}
+
+/**
+ * ice_set_rss_key - Set RSS key
+ * @vsi: Pointer to the VSI structure
+ * @seed: RSS hash seed
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ int status;
+
+ if (!seed)
+ return -EINVAL;
+
+ status = ice_aq_set_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed);
+ if (status)
+ dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS key, err %d aq_err %s\n",
+ status, ice_aq_str(hw->adminq.sq_last_status));
+
+ return status;
+}
+
+/**
+ * ice_get_rss_lut - Get RSS LUT
+ * @vsi: Pointer to VSI structure
+ * @lut: Buffer to store the lookup table entries
+ * @lut_size: Size of buffer to store the lookup table entries
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size)
+{
+ struct ice_aq_get_set_rss_lut_params params = {};
+ struct ice_hw *hw = &vsi->back->hw;
+ int status;
+
+ if (!lut)
+ return -EINVAL;
+
+ params.vsi_handle = vsi->idx;
+ params.lut_size = lut_size;
+ params.lut_type = vsi->rss_lut_type;
+ params.lut = lut;
+
+ status = ice_aq_get_rss_lut(hw, &params);
+ if (status)
+ dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS lut, err %d aq_err %s\n",
+ status, ice_aq_str(hw->adminq.sq_last_status));
+
+ return status;
+}
+
+/**
+ * ice_get_rss_key - Get RSS key
+ * @vsi: Pointer to VSI structure
+ * @seed: Buffer to store the key in
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ int status;
+
+ if (!seed)
+ return -EINVAL;
+
+ status = ice_aq_get_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed);
+ if (status)
+ dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS key, err %d aq_err %s\n",
+ status, ice_aq_str(hw->adminq.sq_last_status));
+
+ return status;
+}
+
+/**
+ * ice_bridge_getlink - Get the hardware bridge mode
+ * @skb: skb buff
+ * @pid: process ID
+ * @seq: RTNL message seq
+ * @dev: the netdev being configured
+ * @filter_mask: filter mask passed in
+ * @nlflags: netlink flags passed in
+ *
+ * Return the bridge mode (VEB/VEPA)
+ */
+static int
+ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+ struct net_device *dev, u32 filter_mask, int nlflags)
+{
+ struct ice_netdev_priv *np = netdev_priv(dev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ u16 bmode;
+
+ bmode = pf->first_sw->bridge_mode;
+
+ return ndo_dflt_bridge_getlink(skb, pid, seq, dev, bmode, 0, 0, nlflags,
+ filter_mask, NULL);
+}
+
+/**
+ * ice_vsi_update_bridge_mode - Update VSI for switching bridge mode (VEB/VEPA)
+ * @vsi: Pointer to VSI structure
+ * @bmode: Hardware bridge mode (VEB/VEPA)
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
+{
+ struct ice_aqc_vsi_props *vsi_props;
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_vsi_ctx *ctxt;
+ int ret;
+
+ vsi_props = &vsi->info;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ ctxt->info = vsi->info;
+
+ if (bmode == BRIDGE_MODE_VEB)
+ /* change from VEPA to VEB mode */
+ ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
+ else
+ /* change from VEB to VEPA mode */
+ ctxt->info.sw_flags &= ~ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
+ ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
+
+ ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (ret) {
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for bridge mode failed, bmode = %d err %d aq_err %s\n",
+ bmode, ret, ice_aq_str(hw->adminq.sq_last_status));
+ goto out;
+ }
+ /* Update sw flags for book keeping */
+ vsi_props->sw_flags = ctxt->info.sw_flags;
+
+out:
+ kfree(ctxt);
+ return ret;
+}
+
+/**
+ * ice_bridge_setlink - Set the hardware bridge mode
+ * @dev: the netdev being configured
+ * @nlh: RTNL message
+ * @flags: bridge setlink flags
+ * @extack: netlink extended ack
+ *
+ * Sets the bridge mode (VEB/VEPA) of the switch to which the netdev (VSI) is
+ * hooked up to. Iterates through the PF VSI list and sets the loopback mode (if
+ * not already set for all VSIs connected to this switch. And also update the
+ * unicast switch filter rules for the corresponding switch of the netdev.
+ */
+static int
+ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
+ u16 __always_unused flags,
+ struct netlink_ext_ack __always_unused *extack)
+{
+ struct ice_netdev_priv *np = netdev_priv(dev);
+ struct ice_pf *pf = np->vsi->back;
+ struct nlattr *attr, *br_spec;
+ struct ice_hw *hw = &pf->hw;
+ struct ice_sw *pf_sw;
+ int rem, v, err = 0;
+
+ pf_sw = pf->first_sw;
+ /* find the attribute in the netlink message */
+ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+
+ nla_for_each_nested(attr, br_spec, rem) {
+ __u16 mode;
+
+ if (nla_type(attr) != IFLA_BRIDGE_MODE)
+ continue;
+ mode = nla_get_u16(attr);
+ if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
+ return -EINVAL;
+ /* Continue if bridge mode is not being flipped */
+ if (mode == pf_sw->bridge_mode)
+ continue;
+ /* Iterates through the PF VSI list and update the loopback
+ * mode of the VSI
+ */
+ ice_for_each_vsi(pf, v) {
+ if (!pf->vsi[v])
+ continue;
+ err = ice_vsi_update_bridge_mode(pf->vsi[v], mode);
+ if (err)
+ return err;
+ }
+
+ hw->evb_veb = (mode == BRIDGE_MODE_VEB);
+ /* Update the unicast switch filter rules for the corresponding
+ * switch of the netdev
+ */
+ err = ice_update_sw_rule_bridge_mode(hw);
+ if (err) {
+ netdev_err(dev, "switch rule update failed, mode = %d err %d aq_err %s\n",
+ mode, err,
+ ice_aq_str(hw->adminq.sq_last_status));
+ /* revert hw->evb_veb */
+ hw->evb_veb = (pf_sw->bridge_mode == BRIDGE_MODE_VEB);
+ return err;
+ }
+
+ pf_sw->bridge_mode = mode;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ * @txqueue: Tx queue
+ */
+static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_tx_ring *tx_ring = NULL;
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ u32 i;
+
+ pf->tx_timeout_count++;
+
+ /* Check if PFC is enabled for the TC to which the queue belongs
+ * to. If yes then Tx timeout is not caused by a hung queue, no
+ * need to reset and rebuild
+ */
+ if (ice_is_pfc_causing_hung_q(pf, txqueue)) {
+ dev_info(ice_pf_to_dev(pf), "Fake Tx hang detected on queue %u, timeout caused by PFC storm\n",
+ txqueue);
+ return;
+ }
+
+ /* now that we have an index, find the tx_ring struct */
+ ice_for_each_txq(vsi, i)
+ if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
+ if (txqueue == vsi->tx_rings[i]->q_index) {
+ tx_ring = vsi->tx_rings[i];
+ break;
+ }
+
+ /* Reset recovery level if enough time has elapsed after last timeout.
+ * Also ensure no new reset action happens before next timeout period.
+ */
+ if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ * 20)))
+ pf->tx_timeout_recovery_level = 1;
+ else if (time_before(jiffies, (pf->tx_timeout_last_recovery +
+ netdev->watchdog_timeo)))
+ return;
+
+ if (tx_ring) {
+ struct ice_hw *hw = &pf->hw;
+ u32 head, val = 0;
+
+ head = (rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue])) &
+ QTX_COMM_HEAD_HEAD_M) >> QTX_COMM_HEAD_HEAD_S;
+ /* Read interrupt register */
+ val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx));
+
+ netdev_info(netdev, "tx_timeout: VSI_num: %d, Q %u, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n",
+ vsi->vsi_num, txqueue, tx_ring->next_to_clean,
+ head, tx_ring->next_to_use, val);
+ }
+
+ pf->tx_timeout_last_recovery = jiffies;
+ netdev_info(netdev, "tx_timeout recovery level %d, txqueue %u\n",
+ pf->tx_timeout_recovery_level, txqueue);
+
+ switch (pf->tx_timeout_recovery_level) {
+ case 1:
+ set_bit(ICE_PFR_REQ, pf->state);
+ break;
+ case 2:
+ set_bit(ICE_CORER_REQ, pf->state);
+ break;
+ case 3:
+ set_bit(ICE_GLOBR_REQ, pf->state);
+ break;
+ default:
+ netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n");
+ set_bit(ICE_DOWN, pf->state);
+ set_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
+ set_bit(ICE_SERVICE_DIS, pf->state);
+ break;
+ }
+
+ ice_service_task_schedule(pf);
+ pf->tx_timeout_recovery_level++;
+}
+
+/**
+ * ice_setup_tc_cls_flower - flower classifier offloads
+ * @np: net device to configure
+ * @filter_dev: device on which filter is added
+ * @cls_flower: offload data
+ */
+static int
+ice_setup_tc_cls_flower(struct ice_netdev_priv *np,
+ struct net_device *filter_dev,
+ struct flow_cls_offload *cls_flower)
+{
+ struct ice_vsi *vsi = np->vsi;
+
+ if (cls_flower->common.chain_index)
+ return -EOPNOTSUPP;
+
+ switch (cls_flower->command) {
+ case FLOW_CLS_REPLACE:
+ return ice_add_cls_flower(filter_dev, vsi, cls_flower);
+ case FLOW_CLS_DESTROY:
+ return ice_del_cls_flower(vsi, cls_flower);
+ default:
+ return -EINVAL;
+ }
+}
+
+/**
+ * ice_setup_tc_block_cb - callback handler registered for TC block
+ * @type: TC SETUP type
+ * @type_data: TC flower offload data that contains user input
+ * @cb_priv: netdev private data
+ */
+static int
+ice_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{
+ struct ice_netdev_priv *np = cb_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return ice_setup_tc_cls_flower(np, np->vsi->netdev,
+ type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+/**
+ * ice_validate_mqprio_qopt - Validate TCF input parameters
+ * @vsi: Pointer to VSI
+ * @mqprio_qopt: input parameters for mqprio queue configuration
+ *
+ * This function validates MQPRIO params, such as qcount (power of 2 wherever
+ * needed), and make sure user doesn't specify qcount and BW rate limit
+ * for TCs, which are more than "num_tc"
+ */
+static int
+ice_validate_mqprio_qopt(struct ice_vsi *vsi,
+ struct tc_mqprio_qopt_offload *mqprio_qopt)
+{
+ int non_power_of_2_qcount = 0;
+ struct ice_pf *pf = vsi->back;
+ int max_rss_q_cnt = 0;
+ u64 sum_min_rate = 0;
+ struct device *dev;
+ int i, speed;
+ u8 num_tc;
+
+ if (vsi->type != ICE_VSI_PF)
+ return -EINVAL;
+
+ if (mqprio_qopt->qopt.offset[0] != 0 ||
+ mqprio_qopt->qopt.num_tc < 1 ||
+ mqprio_qopt->qopt.num_tc > ICE_CHNL_MAX_TC)
+ return -EINVAL;
+
+ dev = ice_pf_to_dev(pf);
+ vsi->ch_rss_size = 0;
+ num_tc = mqprio_qopt->qopt.num_tc;
+ speed = ice_get_link_speed_kbps(vsi);
+
+ for (i = 0; num_tc; i++) {
+ int qcount = mqprio_qopt->qopt.count[i];
+ u64 max_rate, min_rate, rem;
+
+ if (!qcount)
+ return -EINVAL;
+
+ if (is_power_of_2(qcount)) {
+ if (non_power_of_2_qcount &&
+ qcount > non_power_of_2_qcount) {
+ dev_err(dev, "qcount[%d] cannot be greater than non power of 2 qcount[%d]\n",
+ qcount, non_power_of_2_qcount);
+ return -EINVAL;
+ }
+ if (qcount > max_rss_q_cnt)
+ max_rss_q_cnt = qcount;
+ } else {
+ if (non_power_of_2_qcount &&
+ qcount != non_power_of_2_qcount) {
+ dev_err(dev, "Only one non power of 2 qcount allowed[%d,%d]\n",
+ qcount, non_power_of_2_qcount);
+ return -EINVAL;
+ }
+ if (qcount < max_rss_q_cnt) {
+ dev_err(dev, "non power of 2 qcount[%d] cannot be less than other qcount[%d]\n",
+ qcount, max_rss_q_cnt);
+ return -EINVAL;
+ }
+ max_rss_q_cnt = qcount;
+ non_power_of_2_qcount = qcount;
+ }
+
+ /* TC command takes input in K/N/Gbps or K/M/Gbit etc but
+ * converts the bandwidth rate limit into Bytes/s when
+ * passing it down to the driver. So convert input bandwidth
+ * from Bytes/s to Kbps
+ */
+ max_rate = mqprio_qopt->max_rate[i];
+ max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR);
+
+ /* min_rate is minimum guaranteed rate and it can't be zero */
+ min_rate = mqprio_qopt->min_rate[i];
+ min_rate = div_u64(min_rate, ICE_BW_KBPS_DIVISOR);
+ sum_min_rate += min_rate;
+
+ if (min_rate && min_rate < ICE_MIN_BW_LIMIT) {
+ dev_err(dev, "TC%d: min_rate(%llu Kbps) < %u Kbps\n", i,
+ min_rate, ICE_MIN_BW_LIMIT);
+ return -EINVAL;
+ }
+
+ if (max_rate && max_rate > speed) {
+ dev_err(dev, "TC%d: max_rate(%llu Kbps) > link speed of %u Kbps\n",
+ i, max_rate, speed);
+ return -EINVAL;
+ }
+
+ iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem);
+ if (rem) {
+ dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps",
+ i, ICE_MIN_BW_LIMIT);
+ return -EINVAL;
+ }
+
+ iter_div_u64_rem(max_rate, ICE_MIN_BW_LIMIT, &rem);
+ if (rem) {
+ dev_err(dev, "TC%d: Max Rate not multiple of %u Kbps",
+ i, ICE_MIN_BW_LIMIT);
+ return -EINVAL;
+ }
+
+ /* min_rate can't be more than max_rate, except when max_rate
+ * is zero (implies max_rate sought is max line rate). In such
+ * a case min_rate can be more than max.
+ */
+ if (max_rate && min_rate > max_rate) {
+ dev_err(dev, "min_rate %llu Kbps can't be more than max_rate %llu Kbps\n",
+ min_rate, max_rate);
+ return -EINVAL;
+ }
+
+ if (i >= mqprio_qopt->qopt.num_tc - 1)
+ break;
+ if (mqprio_qopt->qopt.offset[i + 1] !=
+ (mqprio_qopt->qopt.offset[i] + qcount))
+ return -EINVAL;
+ }
+ if (vsi->num_rxq <
+ (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
+ return -EINVAL;
+ if (vsi->num_txq <
+ (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
+ return -EINVAL;
+
+ if (sum_min_rate && sum_min_rate > (u64)speed) {
+ dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n",
+ sum_min_rate, speed);
+ return -EINVAL;
+ }
+
+ /* make sure vsi->ch_rss_size is set correctly based on TC's qcount */
+ vsi->ch_rss_size = max_rss_q_cnt;
+
+ return 0;
+}
+
+/**
+ * ice_add_vsi_to_fdir - add a VSI to the flow director group for PF
+ * @pf: ptr to PF device
+ * @vsi: ptr to VSI
+ */
+static int ice_add_vsi_to_fdir(struct ice_pf *pf, struct ice_vsi *vsi)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ bool added = false;
+ struct ice_hw *hw;
+ int flow;
+
+ if (!(vsi->num_gfltr || vsi->num_bfltr))
+ return -EINVAL;
+
+ hw = &pf->hw;
+ for (flow = 0; flow < ICE_FLTR_PTYPE_MAX; flow++) {
+ struct ice_fd_hw_prof *prof;
+ int tun, status;
+ u64 entry_h;
+
+ if (!(hw->fdir_prof && hw->fdir_prof[flow] &&
+ hw->fdir_prof[flow]->cnt))
+ continue;
+
+ for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+ enum ice_flow_priority prio;
+ u64 prof_id;
+
+ /* add this VSI to FDir profile for this flow */
+ prio = ICE_FLOW_PRIO_NORMAL;
+ prof = hw->fdir_prof[flow];
+ prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
+ status = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id,
+ prof->vsi_h[0], vsi->idx,
+ prio, prof->fdir_seg[tun],
+ &entry_h);
+ if (status) {
+ dev_err(dev, "channel VSI idx %d, not able to add to group %d\n",
+ vsi->idx, flow);
+ continue;
+ }
+
+ prof->entry_h[prof->cnt][tun] = entry_h;
+ }
+
+ /* store VSI for filter replay and delete */
+ prof->vsi_h[prof->cnt] = vsi->idx;
+ prof->cnt++;
+
+ added = true;
+ dev_dbg(dev, "VSI idx %d added to fdir group %d\n", vsi->idx,
+ flow);
+ }
+
+ if (!added)
+ dev_dbg(dev, "VSI idx %d not added to fdir groups\n", vsi->idx);
+
+ return 0;
+}
+
+/**
+ * ice_add_channel - add a channel by adding VSI
+ * @pf: ptr to PF device
+ * @sw_id: underlying HW switching element ID
+ * @ch: ptr to channel structure
+ *
+ * Add a channel (VSI) using add_vsi and queue_map
+ */
+static int ice_add_channel(struct ice_pf *pf, u16 sw_id, struct ice_channel *ch)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_vsi *vsi;
+
+ if (ch->type != ICE_VSI_CHNL) {
+ dev_err(dev, "add new VSI failed, ch->type %d\n", ch->type);
+ return -EINVAL;
+ }
+
+ vsi = ice_chnl_vsi_setup(pf, pf->hw.port_info, ch);
+ if (!vsi || vsi->type != ICE_VSI_CHNL) {
+ dev_err(dev, "create chnl VSI failure\n");
+ return -EINVAL;
+ }
+
+ ice_add_vsi_to_fdir(pf, vsi);
+
+ ch->sw_id = sw_id;
+ ch->vsi_num = vsi->vsi_num;
+ ch->info.mapping_flags = vsi->info.mapping_flags;
+ ch->ch_vsi = vsi;
+ /* set the back pointer of channel for newly created VSI */
+ vsi->ch = ch;
+
+ memcpy(&ch->info.q_mapping, &vsi->info.q_mapping,
+ sizeof(vsi->info.q_mapping));
+ memcpy(&ch->info.tc_mapping, vsi->info.tc_mapping,
+ sizeof(vsi->info.tc_mapping));
+
+ return 0;
+}
+
+/**
+ * ice_chnl_cfg_res
+ * @vsi: the VSI being setup
+ * @ch: ptr to channel structure
+ *
+ * Configure channel specific resources such as rings, vector.
+ */
+static void ice_chnl_cfg_res(struct ice_vsi *vsi, struct ice_channel *ch)
+{
+ int i;
+
+ for (i = 0; i < ch->num_txq; i++) {
+ struct ice_q_vector *tx_q_vector, *rx_q_vector;
+ struct ice_ring_container *rc;
+ struct ice_tx_ring *tx_ring;
+ struct ice_rx_ring *rx_ring;
+
+ tx_ring = vsi->tx_rings[ch->base_q + i];
+ rx_ring = vsi->rx_rings[ch->base_q + i];
+ if (!tx_ring || !rx_ring)
+ continue;
+
+ /* setup ring being channel enabled */
+ tx_ring->ch = ch;
+ rx_ring->ch = ch;
+
+ /* following code block sets up vector specific attributes */
+ tx_q_vector = tx_ring->q_vector;
+ rx_q_vector = rx_ring->q_vector;
+ if (!tx_q_vector && !rx_q_vector)
+ continue;
+
+ if (tx_q_vector) {
+ tx_q_vector->ch = ch;
+ /* setup Tx and Rx ITR setting if DIM is off */
+ rc = &tx_q_vector->tx;
+ if (!ITR_IS_DYNAMIC(rc))
+ ice_write_itr(rc, rc->itr_setting);
+ }
+ if (rx_q_vector) {
+ rx_q_vector->ch = ch;
+ /* setup Tx and Rx ITR setting if DIM is off */
+ rc = &rx_q_vector->rx;
+ if (!ITR_IS_DYNAMIC(rc))
+ ice_write_itr(rc, rc->itr_setting);
+ }
+ }
+
+ /* it is safe to assume that, if channel has non-zero num_t[r]xq, then
+ * GLINT_ITR register would have written to perform in-context
+ * update, hence perform flush
+ */
+ if (ch->num_txq || ch->num_rxq)
+ ice_flush(&vsi->back->hw);
+}
+
+/**
+ * ice_cfg_chnl_all_res - configure channel resources
+ * @vsi: pte to main_vsi
+ * @ch: ptr to channel structure
+ *
+ * This function configures channel specific resources such as flow-director
+ * counter index, and other resources such as queues, vectors, ITR settings
+ */
+static void
+ice_cfg_chnl_all_res(struct ice_vsi *vsi, struct ice_channel *ch)
+{
+ /* configure channel (aka ADQ) resources such as queues, vectors,
+ * ITR settings for channel specific vectors and anything else
+ */
+ ice_chnl_cfg_res(vsi, ch);
+}
+
+/**
+ * ice_setup_hw_channel - setup new channel
+ * @pf: ptr to PF device
+ * @vsi: the VSI being setup
+ * @ch: ptr to channel structure
+ * @sw_id: underlying HW switching element ID
+ * @type: type of channel to be created (VMDq2/VF)
+ *
+ * Setup new channel (VSI) based on specified type (VMDq2/VF)
+ * and configures Tx rings accordingly
+ */
+static int
+ice_setup_hw_channel(struct ice_pf *pf, struct ice_vsi *vsi,
+ struct ice_channel *ch, u16 sw_id, u8 type)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ int ret;
+
+ ch->base_q = vsi->next_base_q;
+ ch->type = type;
+
+ ret = ice_add_channel(pf, sw_id, ch);
+ if (ret) {
+ dev_err(dev, "failed to add_channel using sw_id %u\n", sw_id);
+ return ret;
+ }
+
+ /* configure/setup ADQ specific resources */
+ ice_cfg_chnl_all_res(vsi, ch);
+
+ /* make sure to update the next_base_q so that subsequent channel's
+ * (aka ADQ) VSI queue map is correct
+ */
+ vsi->next_base_q = vsi->next_base_q + ch->num_rxq;
+ dev_dbg(dev, "added channel: vsi_num %u, num_rxq %u\n", ch->vsi_num,
+ ch->num_rxq);
+
+ return 0;
+}
+
+/**
+ * ice_setup_channel - setup new channel using uplink element
+ * @pf: ptr to PF device
+ * @vsi: the VSI being setup
+ * @ch: ptr to channel structure
+ *
+ * Setup new channel (VSI) based on specified type (VMDq2/VF)
+ * and uplink switching element
+ */
+static bool
+ice_setup_channel(struct ice_pf *pf, struct ice_vsi *vsi,
+ struct ice_channel *ch)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ u16 sw_id;
+ int ret;
+
+ if (vsi->type != ICE_VSI_PF) {
+ dev_err(dev, "unsupported parent VSI type(%d)\n", vsi->type);
+ return false;
+ }
+
+ sw_id = pf->first_sw->sw_id;
+
+ /* create channel (VSI) */
+ ret = ice_setup_hw_channel(pf, vsi, ch, sw_id, ICE_VSI_CHNL);
+ if (ret) {
+ dev_err(dev, "failed to setup hw_channel\n");
+ return false;
+ }
+ dev_dbg(dev, "successfully created channel()\n");
+
+ return ch->ch_vsi ? true : false;
+}
+
+/**
+ * ice_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
+ * @vsi: VSI to be configured
+ * @max_tx_rate: max Tx rate in Kbps to be configured as maximum BW limit
+ * @min_tx_rate: min Tx rate in Kbps to be configured as minimum BW limit
+ */
+static int
+ice_set_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate, u64 min_tx_rate)
+{
+ int err;
+
+ err = ice_set_min_bw_limit(vsi, min_tx_rate);
+ if (err)
+ return err;
+
+ return ice_set_max_bw_limit(vsi, max_tx_rate);
+}
+
+/**
+ * ice_create_q_channel - function to create channel
+ * @vsi: VSI to be configured
+ * @ch: ptr to channel (it contains channel specific params)
+ *
+ * This function creates channel (VSI) using num_queues specified by user,
+ * reconfigs RSS if needed.
+ */
+static int ice_create_q_channel(struct ice_vsi *vsi, struct ice_channel *ch)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+
+ if (!ch)
+ return -EINVAL;
+
+ dev = ice_pf_to_dev(pf);
+ if (!ch->num_txq || !ch->num_rxq) {
+ dev_err(dev, "Invalid num_queues requested: %d\n", ch->num_rxq);
+ return -EINVAL;
+ }
+
+ if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_txq) {
+ dev_err(dev, "cnt_q_avail (%u) less than num_queues %d\n",
+ vsi->cnt_q_avail, ch->num_txq);
+ return -EINVAL;
+ }
+
+ if (!ice_setup_channel(pf, vsi, ch)) {
+ dev_info(dev, "Failed to setup channel\n");
+ return -EINVAL;
+ }
+ /* configure BW rate limit */
+ if (ch->ch_vsi && (ch->max_tx_rate || ch->min_tx_rate)) {
+ int ret;
+
+ ret = ice_set_bw_limit(ch->ch_vsi, ch->max_tx_rate,
+ ch->min_tx_rate);
+ if (ret)
+ dev_err(dev, "failed to set Tx rate of %llu Kbps for VSI(%u)\n",
+ ch->max_tx_rate, ch->ch_vsi->vsi_num);
+ else
+ dev_dbg(dev, "set Tx rate of %llu Kbps for VSI(%u)\n",
+ ch->max_tx_rate, ch->ch_vsi->vsi_num);
+ }
+
+ vsi->cnt_q_avail -= ch->num_txq;
+
+ return 0;
+}
+
+/**
+ * ice_rem_all_chnl_fltrs - removes all channel filters
+ * @pf: ptr to PF, TC-flower based filter are tracked at PF level
+ *
+ * Remove all advanced switch filters only if they are channel specific
+ * tc-flower based filter
+ */
+static void ice_rem_all_chnl_fltrs(struct ice_pf *pf)
+{
+ struct ice_tc_flower_fltr *fltr;
+ struct hlist_node *node;
+
+ /* to remove all channel filters, iterate an ordered list of filters */
+ hlist_for_each_entry_safe(fltr, node,
+ &pf->tc_flower_fltr_list,
+ tc_flower_node) {
+ struct ice_rule_query_data rule;
+ int status;
+
+ /* for now process only channel specific filters */
+ if (!ice_is_chnl_fltr(fltr))
+ continue;
+
+ rule.rid = fltr->rid;
+ rule.rule_id = fltr->rule_id;
+ rule.vsi_handle = fltr->dest_id;
+ status = ice_rem_adv_rule_by_id(&pf->hw, &rule);
+ if (status) {
+ if (status == -ENOENT)
+ dev_dbg(ice_pf_to_dev(pf), "TC flower filter (rule_id %u) does not exist\n",
+ rule.rule_id);
+ else
+ dev_err(ice_pf_to_dev(pf), "failed to delete TC flower filter, status %d\n",
+ status);
+ } else if (fltr->dest_vsi) {
+ /* update advanced switch filter count */
+ if (fltr->dest_vsi->type == ICE_VSI_CHNL) {
+ u32 flags = fltr->flags;
+
+ fltr->dest_vsi->num_chnl_fltr--;
+ if (flags & (ICE_TC_FLWR_FIELD_DST_MAC |
+ ICE_TC_FLWR_FIELD_ENC_DST_MAC))
+ pf->num_dmac_chnl_fltrs--;
+ }
+ }
+
+ hlist_del(&fltr->tc_flower_node);
+ kfree(fltr);
+ }
+}
+
+/**
+ * ice_remove_q_channels - Remove queue channels for the TCs
+ * @vsi: VSI to be configured
+ * @rem_fltr: delete advanced switch filter or not
+ *
+ * Remove queue channels for the TCs
+ */
+static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_fltr)
+{
+ struct ice_channel *ch, *ch_tmp;
+ struct ice_pf *pf = vsi->back;
+ int i;
+
+ /* remove all tc-flower based filter if they are channel filters only */
+ if (rem_fltr)
+ ice_rem_all_chnl_fltrs(pf);
+
+ /* remove ntuple filters since queue configuration is being changed */
+ if (vsi->netdev->features & NETIF_F_NTUPLE) {
+ struct ice_hw *hw = &pf->hw;
+
+ mutex_lock(&hw->fdir_fltr_lock);
+ ice_fdir_del_all_fltrs(vsi);
+ mutex_unlock(&hw->fdir_fltr_lock);
+ }
+
+ /* perform cleanup for channels if they exist */
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+ struct ice_vsi *ch_vsi;
+
+ list_del(&ch->list);
+ ch_vsi = ch->ch_vsi;
+ if (!ch_vsi) {
+ kfree(ch);
+ continue;
+ }
+
+ /* Reset queue contexts */
+ for (i = 0; i < ch->num_rxq; i++) {
+ struct ice_tx_ring *tx_ring;
+ struct ice_rx_ring *rx_ring;
+
+ tx_ring = vsi->tx_rings[ch->base_q + i];
+ rx_ring = vsi->rx_rings[ch->base_q + i];
+ if (tx_ring) {
+ tx_ring->ch = NULL;
+ if (tx_ring->q_vector)
+ tx_ring->q_vector->ch = NULL;
+ }
+ if (rx_ring) {
+ rx_ring->ch = NULL;
+ if (rx_ring->q_vector)
+ rx_ring->q_vector->ch = NULL;
+ }
+ }
+
+ /* Release FD resources for the channel VSI */
+ ice_fdir_rem_adq_chnl(&pf->hw, ch->ch_vsi->idx);
+
+ /* clear the VSI from scheduler tree */
+ ice_rm_vsi_lan_cfg(ch->ch_vsi->port_info, ch->ch_vsi->idx);
+
+ /* Delete VSI from FW */
+ ice_vsi_delete(ch->ch_vsi);
+
+ /* Delete VSI from PF and HW VSI arrays */
+ ice_vsi_clear(ch->ch_vsi);
+
+ /* free the channel */
+ kfree(ch);
+ }
+
+ /* clear the channel VSI map which is stored in main VSI */
+ ice_for_each_chnl_tc(i)
+ vsi->tc_map_vsi[i] = NULL;
+
+ /* reset main VSI's all TC information */
+ vsi->all_enatc = 0;
+ vsi->all_numtc = 0;
+}
+
+/**
+ * ice_rebuild_channels - rebuild channel
+ * @pf: ptr to PF
+ *
+ * Recreate channel VSIs and replay filters
+ */
+static int ice_rebuild_channels(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_vsi *main_vsi;
+ bool rem_adv_fltr = true;
+ struct ice_channel *ch;
+ struct ice_vsi *vsi;
+ int tc_idx = 1;
+ int i, err;
+
+ main_vsi = ice_get_main_vsi(pf);
+ if (!main_vsi)
+ return 0;
+
+ if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) ||
+ main_vsi->old_numtc == 1)
+ return 0; /* nothing to be done */
+
+ /* reconfigure main VSI based on old value of TC and cached values
+ * for MQPRIO opts
+ */
+ err = ice_vsi_cfg_tc(main_vsi, main_vsi->old_ena_tc);
+ if (err) {
+ dev_err(dev, "failed configuring TC(ena_tc:0x%02x) for HW VSI=%u\n",
+ main_vsi->old_ena_tc, main_vsi->vsi_num);
+ return err;
+ }
+
+ /* rebuild ADQ VSIs */
+ ice_for_each_vsi(pf, i) {
+ enum ice_vsi_type type;
+
+ vsi = pf->vsi[i];
+ if (!vsi || vsi->type != ICE_VSI_CHNL)
+ continue;
+
+ type = vsi->type;
+
+ /* rebuild ADQ VSI */
+ err = ice_vsi_rebuild(vsi, true);
+ if (err) {
+ dev_err(dev, "VSI (type:%s) at index %d rebuild failed, err %d\n",
+ ice_vsi_type_str(type), vsi->idx, err);
+ goto cleanup;
+ }
+
+ /* Re-map HW VSI number, using VSI handle that has been
+ * previously validated in ice_replay_vsi() call above
+ */
+ vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
+
+ /* replay filters for the VSI */
+ err = ice_replay_vsi(&pf->hw, vsi->idx);
+ if (err) {
+ dev_err(dev, "VSI (type:%s) replay failed, err %d, VSI index %d\n",
+ ice_vsi_type_str(type), err, vsi->idx);
+ rem_adv_fltr = false;
+ goto cleanup;
+ }
+ dev_info(dev, "VSI (type:%s) at index %d rebuilt successfully\n",
+ ice_vsi_type_str(type), vsi->idx);
+
+ /* store ADQ VSI at correct TC index in main VSI's
+ * map of TC to VSI
+ */
+ main_vsi->tc_map_vsi[tc_idx++] = vsi;
+ }
+
+ /* ADQ VSI(s) has been rebuilt successfully, so setup
+ * channel for main VSI's Tx and Rx rings
+ */
+ list_for_each_entry(ch, &main_vsi->ch_list, list) {
+ struct ice_vsi *ch_vsi;
+
+ ch_vsi = ch->ch_vsi;
+ if (!ch_vsi)
+ continue;
+
+ /* reconfig channel resources */
+ ice_cfg_chnl_all_res(main_vsi, ch);
+
+ /* replay BW rate limit if it is non-zero */
+ if (!ch->max_tx_rate && !ch->min_tx_rate)
+ continue;
+
+ err = ice_set_bw_limit(ch_vsi, ch->max_tx_rate,
+ ch->min_tx_rate);
+ if (err)
+ dev_err(dev, "failed (err:%d) to rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
+ err, ch->max_tx_rate, ch->min_tx_rate,
+ ch_vsi->vsi_num);
+ else
+ dev_dbg(dev, "successfully rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
+ ch->max_tx_rate, ch->min_tx_rate,
+ ch_vsi->vsi_num);
+ }
+
+ /* reconfig RSS for main VSI */
+ if (main_vsi->ch_rss_size)
+ ice_vsi_cfg_rss_lut_key(main_vsi);
+
+ return 0;
+
+cleanup:
+ ice_remove_q_channels(main_vsi, rem_adv_fltr);
+ return err;
+}
+
+/**
+ * ice_create_q_channels - Add queue channel for the given TCs
+ * @vsi: VSI to be configured
+ *
+ * Configures queue channel mapping to the given TCs
+ */
+static int ice_create_q_channels(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_channel *ch;
+ int ret = 0, i;
+
+ ice_for_each_chnl_tc(i) {
+ if (!(vsi->all_enatc & BIT(i)))
+ continue;
+
+ ch = kzalloc(sizeof(*ch), GFP_KERNEL);
+ if (!ch) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
+ INIT_LIST_HEAD(&ch->list);
+ ch->num_rxq = vsi->mqprio_qopt.qopt.count[i];
+ ch->num_txq = vsi->mqprio_qopt.qopt.count[i];
+ ch->base_q = vsi->mqprio_qopt.qopt.offset[i];
+ ch->max_tx_rate = vsi->mqprio_qopt.max_rate[i];
+ ch->min_tx_rate = vsi->mqprio_qopt.min_rate[i];
+
+ /* convert to Kbits/s */
+ if (ch->max_tx_rate)
+ ch->max_tx_rate = div_u64(ch->max_tx_rate,
+ ICE_BW_KBPS_DIVISOR);
+ if (ch->min_tx_rate)
+ ch->min_tx_rate = div_u64(ch->min_tx_rate,
+ ICE_BW_KBPS_DIVISOR);
+
+ ret = ice_create_q_channel(vsi, ch);
+ if (ret) {
+ dev_err(ice_pf_to_dev(pf),
+ "failed creating channel TC:%d\n", i);
+ kfree(ch);
+ goto err_free;
+ }
+ list_add_tail(&ch->list, &vsi->ch_list);
+ vsi->tc_map_vsi[i] = ch->ch_vsi;
+ dev_dbg(ice_pf_to_dev(pf),
+ "successfully created channel: VSI %pK\n", ch->ch_vsi);
+ }
+ return 0;
+
+err_free:
+ ice_remove_q_channels(vsi, false);
+
+ return ret;
+}
+
+/**
+ * ice_setup_tc_mqprio_qdisc - configure multiple traffic classes
+ * @netdev: net device to configure
+ * @type_data: TC offload data
+ */
+static int ice_setup_tc_mqprio_qdisc(struct net_device *netdev, void *type_data)
+{
+ struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ u16 mode, ena_tc_qdisc = 0;
+ int cur_txq, cur_rxq;
+ u8 hw = 0, num_tcf;
+ struct device *dev;
+ int ret, i;
+
+ dev = ice_pf_to_dev(pf);
+ num_tcf = mqprio_qopt->qopt.num_tc;
+ hw = mqprio_qopt->qopt.hw;
+ mode = mqprio_qopt->mode;
+ if (!hw) {
+ clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
+ vsi->ch_rss_size = 0;
+ memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
+ goto config_tcf;
+ }
+
+ /* Generate queue region map for number of TCF requested */
+ for (i = 0; i < num_tcf; i++)
+ ena_tc_qdisc |= BIT(i);
+
+ switch (mode) {
+ case TC_MQPRIO_MODE_CHANNEL:
+
+ ret = ice_validate_mqprio_qopt(vsi, mqprio_qopt);
+ if (ret) {
+ netdev_err(netdev, "failed to validate_mqprio_qopt(), ret %d\n",
+ ret);
+ return ret;
+ }
+ memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
+ set_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
+ /* don't assume state of hw_tc_offload during driver load
+ * and set the flag for TC flower filter if hw_tc_offload
+ * already ON
+ */
+ if (vsi->netdev->features & NETIF_F_HW_TC)
+ set_bit(ICE_FLAG_CLS_FLOWER, pf->flags);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+config_tcf:
+
+ /* Requesting same TCF configuration as already enabled */
+ if (ena_tc_qdisc == vsi->tc_cfg.ena_tc &&
+ mode != TC_MQPRIO_MODE_CHANNEL)
+ return 0;
+
+ /* Pause VSI queues */
+ ice_dis_vsi(vsi, true);
+
+ if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+ ice_remove_q_channels(vsi, true);
+
+ if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
+ vsi->req_txq = min_t(int, ice_get_avail_txq_count(pf),
+ num_online_cpus());
+ vsi->req_rxq = min_t(int, ice_get_avail_rxq_count(pf),
+ num_online_cpus());
+ } else {
+ /* logic to rebuild VSI, same like ethtool -L */
+ u16 offset = 0, qcount_tx = 0, qcount_rx = 0;
+
+ for (i = 0; i < num_tcf; i++) {
+ if (!(ena_tc_qdisc & BIT(i)))
+ continue;
+
+ offset = vsi->mqprio_qopt.qopt.offset[i];
+ qcount_rx = vsi->mqprio_qopt.qopt.count[i];
+ qcount_tx = vsi->mqprio_qopt.qopt.count[i];
+ }
+ vsi->req_txq = offset + qcount_tx;
+ vsi->req_rxq = offset + qcount_rx;
+
+ /* store away original rss_size info, so that it gets reused
+ * form ice_vsi_rebuild during tc-qdisc delete stage - to
+ * determine, what should be the rss_sizefor main VSI
+ */
+ vsi->orig_rss_size = vsi->rss_size;
+ }
+
+ /* save current values of Tx and Rx queues before calling VSI rebuild
+ * for fallback option
+ */
+ cur_txq = vsi->num_txq;
+ cur_rxq = vsi->num_rxq;
+
+ /* proceed with rebuild main VSI using correct number of queues */
+ ret = ice_vsi_rebuild(vsi, false);
+ if (ret) {
+ /* fallback to current number of queues */
+ dev_info(dev, "Rebuild failed with new queues, try with current number of queues\n");
+ vsi->req_txq = cur_txq;
+ vsi->req_rxq = cur_rxq;
+ clear_bit(ICE_RESET_FAILED, pf->state);
+ if (ice_vsi_rebuild(vsi, false)) {
+ dev_err(dev, "Rebuild of main VSI failed again\n");
+ return ret;
+ }
+ }
+
+ vsi->all_numtc = num_tcf;
+ vsi->all_enatc = ena_tc_qdisc;
+ ret = ice_vsi_cfg_tc(vsi, ena_tc_qdisc);
+ if (ret) {
+ netdev_err(netdev, "failed configuring TC for VSI id=%d\n",
+ vsi->vsi_num);
+ goto exit;
+ }
+
+ if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
+ u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
+ u64 min_tx_rate = vsi->mqprio_qopt.min_rate[0];
+
+ /* set TC0 rate limit if specified */
+ if (max_tx_rate || min_tx_rate) {
+ /* convert to Kbits/s */
+ if (max_tx_rate)
+ max_tx_rate = div_u64(max_tx_rate, ICE_BW_KBPS_DIVISOR);
+ if (min_tx_rate)
+ min_tx_rate = div_u64(min_tx_rate, ICE_BW_KBPS_DIVISOR);
+
+ ret = ice_set_bw_limit(vsi, max_tx_rate, min_tx_rate);
+ if (!ret) {
+ dev_dbg(dev, "set Tx rate max %llu min %llu for VSI(%u)\n",
+ max_tx_rate, min_tx_rate, vsi->vsi_num);
+ } else {
+ dev_err(dev, "failed to set Tx rate max %llu min %llu for VSI(%u)\n",
+ max_tx_rate, min_tx_rate, vsi->vsi_num);
+ goto exit;
+ }
+ }
+ ret = ice_create_q_channels(vsi);
+ if (ret) {
+ netdev_err(netdev, "failed configuring queue channels\n");
+ goto exit;
+ } else {
+ netdev_dbg(netdev, "successfully configured channels\n");
+ }
+ }
+
+ if (vsi->ch_rss_size)
+ ice_vsi_cfg_rss_lut_key(vsi);
+
+exit:
+ /* if error, reset the all_numtc and all_enatc */
+ if (ret) {
+ vsi->all_numtc = 0;
+ vsi->all_enatc = 0;
+ }
+ /* resume VSI */
+ ice_ena_vsi(vsi, true);
+
+ return ret;
+}
+
+static LIST_HEAD(ice_block_cb_list);
+
+static int
+ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+ void *type_data)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_pf *pf = np->vsi->back;
+ bool locked = false;
+ int err;
+
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return flow_block_cb_setup_simple(type_data,
+ &ice_block_cb_list,
+ ice_setup_tc_block_cb,
+ np, np, true);
+ case TC_SETUP_QDISC_MQPRIO:
+ if (ice_is_eswitch_mode_switchdev(pf)) {
+ netdev_err(netdev, "TC MQPRIO offload not supported, switchdev is enabled\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (pf->adev) {
+ mutex_lock(&pf->adev_mutex);
+ device_lock(&pf->adev->dev);
+ locked = true;
+ if (pf->adev->dev.driver) {
+ netdev_err(netdev, "Cannot change qdisc when RDMA is active\n");
+ err = -EBUSY;
+ goto adev_unlock;
+ }
+ }
+
+ /* setup traffic classifier for receive side */
+ mutex_lock(&pf->tc_mutex);
+ err = ice_setup_tc_mqprio_qdisc(netdev, type_data);
+ mutex_unlock(&pf->tc_mutex);
+
+adev_unlock:
+ if (locked) {
+ device_unlock(&pf->adev->dev);
+ mutex_unlock(&pf->adev_mutex);
+ }
+ return err;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return -EOPNOTSUPP;
+}
+
+static struct ice_indr_block_priv *
+ice_indr_block_priv_lookup(struct ice_netdev_priv *np,
+ struct net_device *netdev)
+{
+ struct ice_indr_block_priv *cb_priv;
+
+ list_for_each_entry(cb_priv, &np->tc_indr_block_priv_list, list) {
+ if (!cb_priv->netdev)
+ return NULL;
+ if (cb_priv->netdev == netdev)
+ return cb_priv;
+ }
+ return NULL;
+}
+
+static int
+ice_indr_setup_block_cb(enum tc_setup_type type, void *type_data,
+ void *indr_priv)
+{
+ struct ice_indr_block_priv *priv = indr_priv;
+ struct ice_netdev_priv *np = priv->np;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return ice_setup_tc_cls_flower(np, priv->netdev,
+ (struct flow_cls_offload *)
+ type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+ice_indr_setup_tc_block(struct net_device *netdev, struct Qdisc *sch,
+ struct ice_netdev_priv *np,
+ struct flow_block_offload *f, void *data,
+ void (*cleanup)(struct flow_block_cb *block_cb))
+{
+ struct ice_indr_block_priv *indr_priv;
+ struct flow_block_cb *block_cb;
+
+ if (!ice_is_tunnel_supported(netdev) &&
+ !(is_vlan_dev(netdev) &&
+ vlan_dev_real_dev(netdev) == np->vsi->netdev))
+ return -EOPNOTSUPP;
+
+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case FLOW_BLOCK_BIND:
+ indr_priv = ice_indr_block_priv_lookup(np, netdev);
+ if (indr_priv)
+ return -EEXIST;
+
+ indr_priv = kzalloc(sizeof(*indr_priv), GFP_KERNEL);
+ if (!indr_priv)
+ return -ENOMEM;
+
+ indr_priv->netdev = netdev;
+ indr_priv->np = np;
+ list_add(&indr_priv->list, &np->tc_indr_block_priv_list);
+
+ block_cb =
+ flow_indr_block_cb_alloc(ice_indr_setup_block_cb,
+ indr_priv, indr_priv,
+ ice_rep_indr_tc_block_unbind,
+ f, netdev, sch, data, np,
+ cleanup);
+
+ if (IS_ERR(block_cb)) {
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+ return PTR_ERR(block_cb);
+ }
+ flow_block_cb_add(block_cb, f);
+ list_add_tail(&block_cb->driver_list, &ice_block_cb_list);
+ break;
+ case FLOW_BLOCK_UNBIND:
+ indr_priv = ice_indr_block_priv_lookup(np, netdev);
+ if (!indr_priv)
+ return -ENOENT;
+
+ block_cb = flow_block_cb_lookup(f->block,
+ ice_indr_setup_block_cb,
+ indr_priv);
+ if (!block_cb)
+ return -ENOENT;
+
+ flow_indr_block_cb_remove(block_cb, f);
+
+ list_del(&block_cb->driver_list);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int
+ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch,
+ void *cb_priv, enum tc_setup_type type, void *type_data,
+ void *data,
+ void (*cleanup)(struct flow_block_cb *block_cb))
+{
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return ice_indr_setup_tc_block(netdev, sch, cb_priv, type_data,
+ data, cleanup);
+
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+/**
+ * ice_open - Called when a network interface becomes active
+ * @netdev: network interface device structure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP). At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the netdev watchdog is enabled,
+ * and the stack is notified that the interface is ready.
+ *
+ * Returns 0 on success, negative value on failure
+ */
+int ice_open(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_pf *pf = np->vsi->back;
+
+ if (ice_is_reset_in_progress(pf->state)) {
+ netdev_err(netdev, "can't open net device while reset is in progress");
+ return -EBUSY;
+ }
+
+ return ice_open_internal(netdev);
+}
+
+/**
+ * ice_open_internal - Called when a network interface becomes active
+ * @netdev: network interface device structure
+ *
+ * Internal ice_open implementation. Should not be used directly except for ice_open and reset
+ * handling routine
+ *
+ * Returns 0 on success, negative value on failure
+ */
+int ice_open_internal(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ struct ice_port_info *pi;
+ int err;
+
+ if (test_bit(ICE_NEEDS_RESTART, pf->state)) {
+ netdev_err(netdev, "driver needs to be unloaded and reloaded\n");
+ return -EIO;
+ }
+
+ netif_carrier_off(netdev);
+
+ pi = vsi->port_info;
+ err = ice_update_link_info(pi);
+ if (err) {
+ netdev_err(netdev, "Failed to get link info, error %d\n", err);
+ return err;
+ }
+
+ ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err);
+
+ /* Set PHY if there is media, otherwise, turn off PHY */
+ if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
+ clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+ if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) {
+ err = ice_init_phy_user_cfg(pi);
+ if (err) {
+ netdev_err(netdev, "Failed to initialize PHY settings, error %d\n",
+ err);
+ return err;
+ }
+ }
+
+ err = ice_configure_phy(vsi);
+ if (err) {
+ netdev_err(netdev, "Failed to set physical link up, error %d\n",
+ err);
+ return err;
+ }
+ } else {
+ set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+ ice_set_link(vsi, false);
+ }
+
+ err = ice_vsi_open(vsi);
+ if (err)
+ netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n",
+ vsi->vsi_num, vsi->vsw->sw_id);
+
+ /* Update existing tunnels information */
+ udp_tunnel_get_rx_info(netdev);
+
+ return err;
+}
+
+/**
+ * ice_stop - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * The stop entry point is called when an interface is de-activated by the OS,
+ * and the netdevice enters the DOWN state. The hardware is still under the
+ * driver's control, but the netdev interface is disabled.
+ *
+ * Returns success only - not allowed to fail
+ */
+int ice_stop(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+
+ if (ice_is_reset_in_progress(pf->state)) {
+ netdev_err(netdev, "can't stop net device while reset is in progress");
+ return -EBUSY;
+ }
+
+ if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) {
+ int link_err = ice_force_phys_link_state(vsi, false);
+
+ if (link_err) {
+ if (link_err == -ENOMEDIUM)
+ netdev_info(vsi->netdev, "Skipping link reconfig - no media attached, VSI %d\n",
+ vsi->vsi_num);
+ else
+ netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n",
+ vsi->vsi_num, link_err);
+
+ ice_vsi_close(vsi);
+ return -EIO;
+ }
+ }
+
+ ice_vsi_close(vsi);
+
+ return 0;
+}
+
+/**
+ * ice_features_check - Validate encapsulated packet conforms to limits
+ * @skb: skb buffer
+ * @netdev: This port's netdev
+ * @features: Offload features that the stack believes apply
+ */
+static netdev_features_t
+ice_features_check(struct sk_buff *skb,
+ struct net_device __always_unused *netdev,
+ netdev_features_t features)
+{
+ bool gso = skb_is_gso(skb);
+ size_t len;
+
+ /* No point in doing any of this if neither checksum nor GSO are
+ * being requested for this frame. We can rule out both by just
+ * checking for CHECKSUM_PARTIAL
+ */
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ return features;
+
+ /* We cannot support GSO if the MSS is going to be less than
+ * 64 bytes. If it is then we need to drop support for GSO.
+ */
+ if (gso && (skb_shinfo(skb)->gso_size < ICE_TXD_CTX_MIN_MSS))
+ features &= ~NETIF_F_GSO_MASK;
+
+ len = skb_network_offset(skb);
+ if (len > ICE_TXD_MACLEN_MAX || len & 0x1)
+ goto out_rm_features;
+
+ len = skb_network_header_len(skb);
+ if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
+ goto out_rm_features;
+
+ if (skb->encapsulation) {
+ /* this must work for VXLAN frames AND IPIP/SIT frames, and in
+ * the case of IPIP frames, the transport header pointer is
+ * after the inner header! So check to make sure that this
+ * is a GRE or UDP_TUNNEL frame before doing that math.
+ */
+ if (gso && (skb_shinfo(skb)->gso_type &
+ (SKB_GSO_GRE | SKB_GSO_UDP_TUNNEL))) {
+ len = skb_inner_network_header(skb) -
+ skb_transport_header(skb);
+ if (len > ICE_TXD_L4LEN_MAX || len & 0x1)
+ goto out_rm_features;
+ }
+
+ len = skb_inner_network_header_len(skb);
+ if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
+ goto out_rm_features;
+ }
+
+ return features;
+out_rm_features:
+ return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
+static const struct net_device_ops ice_netdev_safe_mode_ops = {
+ .ndo_open = ice_open,
+ .ndo_stop = ice_stop,
+ .ndo_start_xmit = ice_start_xmit,
+ .ndo_set_mac_address = ice_set_mac_address,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = ice_change_mtu,
+ .ndo_get_stats64 = ice_get_stats64,
+ .ndo_tx_timeout = ice_tx_timeout,
+ .ndo_bpf = ice_xdp_safe_mode,
+};
+
+static const struct net_device_ops ice_netdev_ops = {
+ .ndo_open = ice_open,
+ .ndo_stop = ice_stop,
+ .ndo_start_xmit = ice_start_xmit,
+ .ndo_select_queue = ice_select_queue,
+ .ndo_features_check = ice_features_check,
+ .ndo_fix_features = ice_fix_features,
+ .ndo_set_rx_mode = ice_set_rx_mode,
+ .ndo_set_mac_address = ice_set_mac_address,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = ice_change_mtu,
+ .ndo_get_stats64 = ice_get_stats64,
+ .ndo_set_tx_maxrate = ice_set_tx_maxrate,
+ .ndo_eth_ioctl = ice_eth_ioctl,
+ .ndo_set_vf_spoofchk = ice_set_vf_spoofchk,
+ .ndo_set_vf_mac = ice_set_vf_mac,
+ .ndo_get_vf_config = ice_get_vf_cfg,
+ .ndo_set_vf_trust = ice_set_vf_trust,
+ .ndo_set_vf_vlan = ice_set_vf_port_vlan,
+ .ndo_set_vf_link_state = ice_set_vf_link_state,
+ .ndo_get_vf_stats = ice_get_vf_stats,
+ .ndo_set_vf_rate = ice_set_vf_bw,
+ .ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
+ .ndo_setup_tc = ice_setup_tc,
+ .ndo_set_features = ice_set_features,
+ .ndo_bridge_getlink = ice_bridge_getlink,
+ .ndo_bridge_setlink = ice_bridge_setlink,
+ .ndo_fdb_add = ice_fdb_add,
+ .ndo_fdb_del = ice_fdb_del,
+#ifdef CONFIG_RFS_ACCEL
+ .ndo_rx_flow_steer = ice_rx_flow_steer,
+#endif
+ .ndo_tx_timeout = ice_tx_timeout,
+ .ndo_bpf = ice_xdp,
+ .ndo_xdp_xmit = ice_xdp_xmit,
+ .ndo_xsk_wakeup = ice_xsk_wakeup,
+ .ndo_get_devlink_port = ice_get_devlink_port,
+};
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
new file mode 100644
index 000000000..c262dc886
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -0,0 +1,1240 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include <linux/vmalloc.h>
+
+#include "ice_common.h"
+
+/**
+ * ice_aq_read_nvm
+ * @hw: pointer to the HW struct
+ * @module_typeid: module pointer location in words from the NVM beginning
+ * @offset: byte offset from the module beginning
+ * @length: length of the section to be read (in bytes from the offset)
+ * @data: command buffer (size [bytes] = length)
+ * @last_command: tells if this is the last command in a series
+ * @read_shadow_ram: tell if this is a shadow RAM read
+ * @cd: pointer to command details structure or NULL
+ *
+ * Read the NVM using the admin queue commands (0x0701)
+ */
+static int
+ice_aq_read_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset, u16 length,
+ void *data, bool last_command, bool read_shadow_ram,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aq_desc desc;
+ struct ice_aqc_nvm *cmd;
+
+ cmd = &desc.params.nvm;
+
+ if (offset > ICE_AQC_NVM_MAX_OFFSET)
+ return -EINVAL;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_read);
+
+ if (!read_shadow_ram && module_typeid == ICE_AQC_NVM_START_POINT)
+ cmd->cmd_flags |= ICE_AQC_NVM_FLASH_ONLY;
+
+ /* If this is the last command in a series, set the proper flag. */
+ if (last_command)
+ cmd->cmd_flags |= ICE_AQC_NVM_LAST_CMD;
+ cmd->module_typeid = cpu_to_le16(module_typeid);
+ cmd->offset_low = cpu_to_le16(offset & 0xFFFF);
+ cmd->offset_high = (offset >> 16) & 0xFF;
+ cmd->length = cpu_to_le16(length);
+
+ return ice_aq_send_cmd(hw, &desc, data, length, cd);
+}
+
+/**
+ * ice_read_flat_nvm - Read portion of NVM by flat offset
+ * @hw: pointer to the HW struct
+ * @offset: offset from beginning of NVM
+ * @length: (in) number of bytes to read; (out) number of bytes actually read
+ * @data: buffer to return data in (sized to fit the specified length)
+ * @read_shadow_ram: if true, read from shadow RAM instead of NVM
+ *
+ * Reads a portion of the NVM, as a flat memory space. This function correctly
+ * breaks read requests across Shadow RAM sectors and ensures that no single
+ * read request exceeds the maximum 4KB read for a single AdminQ command.
+ *
+ * Returns a status code on failure. Note that the data pointer may be
+ * partially updated if some reads succeed before a failure.
+ */
+int
+ice_read_flat_nvm(struct ice_hw *hw, u32 offset, u32 *length, u8 *data,
+ bool read_shadow_ram)
+{
+ u32 inlen = *length;
+ u32 bytes_read = 0;
+ bool last_cmd;
+ int status;
+
+ *length = 0;
+
+ /* Verify the length of the read if this is for the Shadow RAM */
+ if (read_shadow_ram && ((offset + inlen) > (hw->flash.sr_words * 2u))) {
+ ice_debug(hw, ICE_DBG_NVM, "NVM error: requested offset is beyond Shadow RAM limit\n");
+ return -EINVAL;
+ }
+
+ do {
+ u32 read_size, sector_offset;
+
+ /* ice_aq_read_nvm cannot read more than 4KB at a time.
+ * Additionally, a read from the Shadow RAM may not cross over
+ * a sector boundary. Conveniently, the sector size is also
+ * 4KB.
+ */
+ sector_offset = offset % ICE_AQ_MAX_BUF_LEN;
+ read_size = min_t(u32, ICE_AQ_MAX_BUF_LEN - sector_offset,
+ inlen - bytes_read);
+
+ last_cmd = !(bytes_read + read_size < inlen);
+
+ status = ice_aq_read_nvm(hw, ICE_AQC_NVM_START_POINT,
+ offset, read_size,
+ data + bytes_read, last_cmd,
+ read_shadow_ram, NULL);
+ if (status)
+ break;
+
+ bytes_read += read_size;
+ offset += read_size;
+ } while (!last_cmd);
+
+ *length = bytes_read;
+ return status;
+}
+
+/**
+ * ice_aq_update_nvm
+ * @hw: pointer to the HW struct
+ * @module_typeid: module pointer location in words from the NVM beginning
+ * @offset: byte offset from the module beginning
+ * @length: length of the section to be written (in bytes from the offset)
+ * @data: command buffer (size [bytes] = length)
+ * @last_command: tells if this is the last command in a series
+ * @command_flags: command parameters
+ * @cd: pointer to command details structure or NULL
+ *
+ * Update the NVM using the admin queue commands (0x0703)
+ */
+int
+ice_aq_update_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset,
+ u16 length, void *data, bool last_command, u8 command_flags,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aq_desc desc;
+ struct ice_aqc_nvm *cmd;
+
+ cmd = &desc.params.nvm;
+
+ /* In offset the highest byte must be zeroed. */
+ if (offset & 0xFF000000)
+ return -EINVAL;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_write);
+
+ cmd->cmd_flags |= command_flags;
+
+ /* If this is the last command in a series, set the proper flag. */
+ if (last_command)
+ cmd->cmd_flags |= ICE_AQC_NVM_LAST_CMD;
+ cmd->module_typeid = cpu_to_le16(module_typeid);
+ cmd->offset_low = cpu_to_le16(offset & 0xFFFF);
+ cmd->offset_high = (offset >> 16) & 0xFF;
+ cmd->length = cpu_to_le16(length);
+
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ return ice_aq_send_cmd(hw, &desc, data, length, cd);
+}
+
+/**
+ * ice_aq_erase_nvm
+ * @hw: pointer to the HW struct
+ * @module_typeid: module pointer location in words from the NVM beginning
+ * @cd: pointer to command details structure or NULL
+ *
+ * Erase the NVM sector using the admin queue commands (0x0702)
+ */
+int ice_aq_erase_nvm(struct ice_hw *hw, u16 module_typeid, struct ice_sq_cd *cd)
+{
+ struct ice_aq_desc desc;
+ struct ice_aqc_nvm *cmd;
+
+ cmd = &desc.params.nvm;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_erase);
+
+ cmd->module_typeid = cpu_to_le16(module_typeid);
+ cmd->length = cpu_to_le16(ICE_AQC_NVM_ERASE_LEN);
+ cmd->offset_low = 0;
+ cmd->offset_high = 0;
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_read_sr_word_aq - Reads Shadow RAM via AQ
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM using ice_read_flat_nvm.
+ */
+static int ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data)
+{
+ u32 bytes = sizeof(u16);
+ __le16 data_local;
+ int status;
+
+ /* Note that ice_read_flat_nvm takes into account the 4Kb AdminQ and
+ * Shadow RAM sector restrictions necessary when reading from the NVM.
+ */
+ status = ice_read_flat_nvm(hw, offset * sizeof(u16), &bytes,
+ (__force u8 *)&data_local, true);
+ if (status)
+ return status;
+
+ *data = le16_to_cpu(data_local);
+ return 0;
+}
+
+/**
+ * ice_acquire_nvm - Generic request for acquiring the NVM ownership
+ * @hw: pointer to the HW structure
+ * @access: NVM access type (read or write)
+ *
+ * This function will request NVM ownership.
+ */
+int ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access)
+{
+ if (hw->flash.blank_nvm_mode)
+ return 0;
+
+ return ice_acquire_res(hw, ICE_NVM_RES_ID, access, ICE_NVM_TIMEOUT);
+}
+
+/**
+ * ice_release_nvm - Generic request for releasing the NVM ownership
+ * @hw: pointer to the HW structure
+ *
+ * This function will release NVM ownership.
+ */
+void ice_release_nvm(struct ice_hw *hw)
+{
+ if (hw->flash.blank_nvm_mode)
+ return;
+
+ ice_release_res(hw, ICE_NVM_RES_ID);
+}
+
+/**
+ * ice_get_flash_bank_offset - Get offset into requested flash bank
+ * @hw: pointer to the HW structure
+ * @bank: whether to read from the active or inactive flash bank
+ * @module: the module to read from
+ *
+ * Based on the module, lookup the module offset from the beginning of the
+ * flash.
+ *
+ * Returns the flash offset. Note that a value of zero is invalid and must be
+ * treated as an error.
+ */
+static u32 ice_get_flash_bank_offset(struct ice_hw *hw, enum ice_bank_select bank, u16 module)
+{
+ struct ice_bank_info *banks = &hw->flash.banks;
+ enum ice_flash_bank active_bank;
+ bool second_bank_active;
+ u32 offset, size;
+
+ switch (module) {
+ case ICE_SR_1ST_NVM_BANK_PTR:
+ offset = banks->nvm_ptr;
+ size = banks->nvm_size;
+ active_bank = banks->nvm_bank;
+ break;
+ case ICE_SR_1ST_OROM_BANK_PTR:
+ offset = banks->orom_ptr;
+ size = banks->orom_size;
+ active_bank = banks->orom_bank;
+ break;
+ case ICE_SR_NETLIST_BANK_PTR:
+ offset = banks->netlist_ptr;
+ size = banks->netlist_size;
+ active_bank = banks->netlist_bank;
+ break;
+ default:
+ ice_debug(hw, ICE_DBG_NVM, "Unexpected value for flash module: 0x%04x\n", module);
+ return 0;
+ }
+
+ switch (active_bank) {
+ case ICE_1ST_FLASH_BANK:
+ second_bank_active = false;
+ break;
+ case ICE_2ND_FLASH_BANK:
+ second_bank_active = true;
+ break;
+ default:
+ ice_debug(hw, ICE_DBG_NVM, "Unexpected value for active flash bank: %u\n",
+ active_bank);
+ return 0;
+ }
+
+ /* The second flash bank is stored immediately following the first
+ * bank. Based on whether the 1st or 2nd bank is active, and whether
+ * we want the active or inactive bank, calculate the desired offset.
+ */
+ switch (bank) {
+ case ICE_ACTIVE_FLASH_BANK:
+ return offset + (second_bank_active ? size : 0);
+ case ICE_INACTIVE_FLASH_BANK:
+ return offset + (second_bank_active ? 0 : size);
+ }
+
+ ice_debug(hw, ICE_DBG_NVM, "Unexpected value for flash bank selection: %u\n", bank);
+ return 0;
+}
+
+/**
+ * ice_read_flash_module - Read a word from one of the main NVM modules
+ * @hw: pointer to the HW structure
+ * @bank: which bank of the module to read
+ * @module: the module to read
+ * @offset: the offset into the module in bytes
+ * @data: storage for the word read from the flash
+ * @length: bytes of data to read
+ *
+ * Read data from the specified flash module. The bank parameter indicates
+ * whether or not to read from the active bank or the inactive bank of that
+ * module.
+ *
+ * The word will be read using flat NVM access, and relies on the
+ * hw->flash.banks data being setup by ice_determine_active_flash_banks()
+ * during initialization.
+ */
+static int
+ice_read_flash_module(struct ice_hw *hw, enum ice_bank_select bank, u16 module,
+ u32 offset, u8 *data, u32 length)
+{
+ int status;
+ u32 start;
+
+ start = ice_get_flash_bank_offset(hw, bank, module);
+ if (!start) {
+ ice_debug(hw, ICE_DBG_NVM, "Unable to calculate flash bank offset for module 0x%04x\n",
+ module);
+ return -EINVAL;
+ }
+
+ status = ice_acquire_nvm(hw, ICE_RES_READ);
+ if (status)
+ return status;
+
+ status = ice_read_flat_nvm(hw, start + offset, &length, data, false);
+
+ ice_release_nvm(hw);
+
+ return status;
+}
+
+/**
+ * ice_read_nvm_module - Read from the active main NVM module
+ * @hw: pointer to the HW structure
+ * @bank: whether to read from active or inactive NVM module
+ * @offset: offset into the NVM module to read, in words
+ * @data: storage for returned word value
+ *
+ * Read the specified word from the active NVM module. This includes the CSS
+ * header at the start of the NVM module.
+ */
+static int
+ice_read_nvm_module(struct ice_hw *hw, enum ice_bank_select bank, u32 offset, u16 *data)
+{
+ __le16 data_local;
+ int status;
+
+ status = ice_read_flash_module(hw, bank, ICE_SR_1ST_NVM_BANK_PTR, offset * sizeof(u16),
+ (__force u8 *)&data_local, sizeof(u16));
+ if (!status)
+ *data = le16_to_cpu(data_local);
+
+ return status;
+}
+
+/**
+ * ice_read_nvm_sr_copy - Read a word from the Shadow RAM copy in the NVM bank
+ * @hw: pointer to the HW structure
+ * @bank: whether to read from the active or inactive NVM module
+ * @offset: offset into the Shadow RAM copy to read, in words
+ * @data: storage for returned word value
+ *
+ * Read the specified word from the copy of the Shadow RAM found in the
+ * specified NVM module.
+ */
+static int
+ice_read_nvm_sr_copy(struct ice_hw *hw, enum ice_bank_select bank, u32 offset, u16 *data)
+{
+ return ice_read_nvm_module(hw, bank, ICE_NVM_SR_COPY_WORD_OFFSET + offset, data);
+}
+
+/**
+ * ice_read_netlist_module - Read data from the netlist module area
+ * @hw: pointer to the HW structure
+ * @bank: whether to read from the active or inactive module
+ * @offset: offset into the netlist to read from
+ * @data: storage for returned word value
+ *
+ * Read a word from the specified netlist bank.
+ */
+static int
+ice_read_netlist_module(struct ice_hw *hw, enum ice_bank_select bank, u32 offset, u16 *data)
+{
+ __le16 data_local;
+ int status;
+
+ status = ice_read_flash_module(hw, bank, ICE_SR_NETLIST_BANK_PTR, offset * sizeof(u16),
+ (__force u8 *)&data_local, sizeof(u16));
+ if (!status)
+ *data = le16_to_cpu(data_local);
+
+ return status;
+}
+
+/**
+ * ice_read_sr_word - Reads Shadow RAM word and acquire NVM if necessary
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM using the ice_read_sr_word_aq.
+ */
+int ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
+{
+ int status;
+
+ status = ice_acquire_nvm(hw, ICE_RES_READ);
+ if (!status) {
+ status = ice_read_sr_word_aq(hw, offset, data);
+ ice_release_nvm(hw);
+ }
+
+ return status;
+}
+
+/**
+ * ice_get_pfa_module_tlv - Reads sub module TLV from NVM PFA
+ * @hw: pointer to hardware structure
+ * @module_tlv: pointer to module TLV to return
+ * @module_tlv_len: pointer to module TLV length to return
+ * @module_type: module type requested
+ *
+ * Finds the requested sub module TLV type from the Preserved Field
+ * Area (PFA) and returns the TLV pointer and length. The caller can
+ * use these to read the variable length TLV value.
+ */
+int
+ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
+ u16 module_type)
+{
+ u16 pfa_len, pfa_ptr;
+ u16 next_tlv;
+ int status;
+
+ status = ice_read_sr_word(hw, ICE_SR_PFA_PTR, &pfa_ptr);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Preserved Field Array pointer.\n");
+ return status;
+ }
+ status = ice_read_sr_word(hw, pfa_ptr, &pfa_len);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read PFA length.\n");
+ return status;
+ }
+ /* Starting with first TLV after PFA length, iterate through the list
+ * of TLVs to find the requested one.
+ */
+ next_tlv = pfa_ptr + 1;
+ while (next_tlv < pfa_ptr + pfa_len) {
+ u16 tlv_sub_module_type;
+ u16 tlv_len;
+
+ /* Read TLV type */
+ status = ice_read_sr_word(hw, next_tlv, &tlv_sub_module_type);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV type.\n");
+ break;
+ }
+ /* Read TLV length */
+ status = ice_read_sr_word(hw, next_tlv + 1, &tlv_len);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV length.\n");
+ break;
+ }
+ if (tlv_sub_module_type == module_type) {
+ if (tlv_len) {
+ *module_tlv = next_tlv;
+ *module_tlv_len = tlv_len;
+ return 0;
+ }
+ return -EINVAL;
+ }
+ /* Check next TLV, i.e. current TLV pointer + length + 2 words
+ * (for current TLV's type and length)
+ */
+ next_tlv = next_tlv + tlv_len + 2;
+ }
+ /* Module does not exist */
+ return -ENOENT;
+}
+
+/**
+ * ice_read_pba_string - Reads part number string from NVM
+ * @hw: pointer to hardware structure
+ * @pba_num: stores the part number string from the NVM
+ * @pba_num_size: part number string buffer length
+ *
+ * Reads the part number string from the NVM.
+ */
+int ice_read_pba_string(struct ice_hw *hw, u8 *pba_num, u32 pba_num_size)
+{
+ u16 pba_tlv, pba_tlv_len;
+ u16 pba_word, pba_size;
+ int status;
+ u16 i;
+
+ status = ice_get_pfa_module_tlv(hw, &pba_tlv, &pba_tlv_len,
+ ICE_SR_PBA_BLOCK_PTR);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read PBA Block TLV.\n");
+ return status;
+ }
+
+ /* pba_size is the next word */
+ status = ice_read_sr_word(hw, (pba_tlv + 2), &pba_size);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read PBA Section size.\n");
+ return status;
+ }
+
+ if (pba_tlv_len < pba_size) {
+ ice_debug(hw, ICE_DBG_INIT, "Invalid PBA Block TLV size.\n");
+ return -EINVAL;
+ }
+
+ /* Subtract one to get PBA word count (PBA Size word is included in
+ * total size)
+ */
+ pba_size--;
+ if (pba_num_size < (((u32)pba_size * 2) + 1)) {
+ ice_debug(hw, ICE_DBG_INIT, "Buffer too small for PBA data.\n");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < pba_size; i++) {
+ status = ice_read_sr_word(hw, (pba_tlv + 2 + 1) + i, &pba_word);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read PBA Block word %d.\n", i);
+ return status;
+ }
+
+ pba_num[(i * 2)] = (pba_word >> 8) & 0xFF;
+ pba_num[(i * 2) + 1] = pba_word & 0xFF;
+ }
+ pba_num[(pba_size * 2)] = '\0';
+
+ return status;
+}
+
+/**
+ * ice_get_nvm_ver_info - Read NVM version information
+ * @hw: pointer to the HW struct
+ * @bank: whether to read from the active or inactive flash bank
+ * @nvm: pointer to NVM info structure
+ *
+ * Read the NVM EETRACK ID and map version of the main NVM image bank, filling
+ * in the NVM info structure.
+ */
+static int
+ice_get_nvm_ver_info(struct ice_hw *hw, enum ice_bank_select bank, struct ice_nvm_info *nvm)
+{
+ u16 eetrack_lo, eetrack_hi, ver;
+ int status;
+
+ status = ice_read_nvm_sr_copy(hw, bank, ICE_SR_NVM_DEV_STARTER_VER, &ver);
+ if (status) {
+ ice_debug(hw, ICE_DBG_NVM, "Failed to read DEV starter version.\n");
+ return status;
+ }
+
+ nvm->major = (ver & ICE_NVM_VER_HI_MASK) >> ICE_NVM_VER_HI_SHIFT;
+ nvm->minor = (ver & ICE_NVM_VER_LO_MASK) >> ICE_NVM_VER_LO_SHIFT;
+
+ status = ice_read_nvm_sr_copy(hw, bank, ICE_SR_NVM_EETRACK_LO, &eetrack_lo);
+ if (status) {
+ ice_debug(hw, ICE_DBG_NVM, "Failed to read EETRACK lo.\n");
+ return status;
+ }
+ status = ice_read_nvm_sr_copy(hw, bank, ICE_SR_NVM_EETRACK_HI, &eetrack_hi);
+ if (status) {
+ ice_debug(hw, ICE_DBG_NVM, "Failed to read EETRACK hi.\n");
+ return status;
+ }
+
+ nvm->eetrack = (eetrack_hi << 16) | eetrack_lo;
+
+ return 0;
+}
+
+/**
+ * ice_get_inactive_nvm_ver - Read Option ROM version from the inactive bank
+ * @hw: pointer to the HW structure
+ * @nvm: storage for Option ROM version information
+ *
+ * Reads the NVM EETRACK ID, Map version, and security revision of the
+ * inactive NVM bank. Used to access version data for a pending update that
+ * has not yet been activated.
+ */
+int ice_get_inactive_nvm_ver(struct ice_hw *hw, struct ice_nvm_info *nvm)
+{
+ return ice_get_nvm_ver_info(hw, ICE_INACTIVE_FLASH_BANK, nvm);
+}
+
+/**
+ * ice_get_orom_civd_data - Get the combo version information from Option ROM
+ * @hw: pointer to the HW struct
+ * @bank: whether to read from the active or inactive flash module
+ * @civd: storage for the Option ROM CIVD data.
+ *
+ * Searches through the Option ROM flash contents to locate the CIVD data for
+ * the image.
+ */
+static int
+ice_get_orom_civd_data(struct ice_hw *hw, enum ice_bank_select bank,
+ struct ice_orom_civd_info *civd)
+{
+ u8 *orom_data;
+ int status;
+ u32 offset;
+
+ /* The CIVD section is located in the Option ROM aligned to 512 bytes.
+ * The first 4 bytes must contain the ASCII characters "$CIV".
+ * A simple modulo 256 sum of all of the bytes of the structure must
+ * equal 0.
+ *
+ * The exact location is unknown and varies between images but is
+ * usually somewhere in the middle of the bank. We need to scan the
+ * Option ROM bank to locate it.
+ *
+ * It's significantly faster to read the entire Option ROM up front
+ * using the maximum page size, than to read each possible location
+ * with a separate firmware command.
+ */
+ orom_data = vzalloc(hw->flash.banks.orom_size);
+ if (!orom_data)
+ return -ENOMEM;
+
+ status = ice_read_flash_module(hw, bank, ICE_SR_1ST_OROM_BANK_PTR, 0,
+ orom_data, hw->flash.banks.orom_size);
+ if (status) {
+ vfree(orom_data);
+ ice_debug(hw, ICE_DBG_NVM, "Unable to read Option ROM data\n");
+ return status;
+ }
+
+ /* Scan the memory buffer to locate the CIVD data section */
+ for (offset = 0; (offset + 512) <= hw->flash.banks.orom_size; offset += 512) {
+ struct ice_orom_civd_info *tmp;
+ u8 sum = 0, i;
+
+ tmp = (struct ice_orom_civd_info *)&orom_data[offset];
+
+ /* Skip forward until we find a matching signature */
+ if (memcmp("$CIV", tmp->signature, sizeof(tmp->signature)) != 0)
+ continue;
+
+ ice_debug(hw, ICE_DBG_NVM, "Found CIVD section at offset %u\n",
+ offset);
+
+ /* Verify that the simple checksum is zero */
+ for (i = 0; i < sizeof(*tmp); i++)
+ /* cppcheck-suppress objectIndex */
+ sum += ((u8 *)tmp)[i];
+
+ if (sum) {
+ ice_debug(hw, ICE_DBG_NVM, "Found CIVD data with invalid checksum of %u\n",
+ sum);
+ goto err_invalid_checksum;
+ }
+
+ *civd = *tmp;
+ vfree(orom_data);
+ return 0;
+ }
+
+ ice_debug(hw, ICE_DBG_NVM, "Unable to locate CIVD data within the Option ROM\n");
+
+err_invalid_checksum:
+ vfree(orom_data);
+ return -EIO;
+}
+
+/**
+ * ice_get_orom_ver_info - Read Option ROM version information
+ * @hw: pointer to the HW struct
+ * @bank: whether to read from the active or inactive flash module
+ * @orom: pointer to Option ROM info structure
+ *
+ * Read Option ROM version and security revision from the Option ROM flash
+ * section.
+ */
+static int
+ice_get_orom_ver_info(struct ice_hw *hw, enum ice_bank_select bank, struct ice_orom_info *orom)
+{
+ struct ice_orom_civd_info civd;
+ u32 combo_ver;
+ int status;
+
+ status = ice_get_orom_civd_data(hw, bank, &civd);
+ if (status) {
+ ice_debug(hw, ICE_DBG_NVM, "Failed to locate valid Option ROM CIVD data\n");
+ return status;
+ }
+
+ combo_ver = le32_to_cpu(civd.combo_ver);
+
+ orom->major = (u8)((combo_ver & ICE_OROM_VER_MASK) >> ICE_OROM_VER_SHIFT);
+ orom->patch = (u8)(combo_ver & ICE_OROM_VER_PATCH_MASK);
+ orom->build = (u16)((combo_ver & ICE_OROM_VER_BUILD_MASK) >> ICE_OROM_VER_BUILD_SHIFT);
+
+ return 0;
+}
+
+/**
+ * ice_get_inactive_orom_ver - Read Option ROM version from the inactive bank
+ * @hw: pointer to the HW structure
+ * @orom: storage for Option ROM version information
+ *
+ * Reads the Option ROM version and security revision data for the inactive
+ * section of flash. Used to access version data for a pending update that has
+ * not yet been activated.
+ */
+int ice_get_inactive_orom_ver(struct ice_hw *hw, struct ice_orom_info *orom)
+{
+ return ice_get_orom_ver_info(hw, ICE_INACTIVE_FLASH_BANK, orom);
+}
+
+/**
+ * ice_get_netlist_info
+ * @hw: pointer to the HW struct
+ * @bank: whether to read from the active or inactive flash bank
+ * @netlist: pointer to netlist version info structure
+ *
+ * Get the netlist version information from the requested bank. Reads the Link
+ * Topology section to find the Netlist ID block and extract the relevant
+ * information into the netlist version structure.
+ */
+static int
+ice_get_netlist_info(struct ice_hw *hw, enum ice_bank_select bank,
+ struct ice_netlist_info *netlist)
+{
+ u16 module_id, length, node_count, i;
+ u16 *id_blk;
+ int status;
+
+ status = ice_read_netlist_module(hw, bank, ICE_NETLIST_TYPE_OFFSET, &module_id);
+ if (status)
+ return status;
+
+ if (module_id != ICE_NETLIST_LINK_TOPO_MOD_ID) {
+ ice_debug(hw, ICE_DBG_NVM, "Expected netlist module_id ID of 0x%04x, but got 0x%04x\n",
+ ICE_NETLIST_LINK_TOPO_MOD_ID, module_id);
+ return -EIO;
+ }
+
+ status = ice_read_netlist_module(hw, bank, ICE_LINK_TOPO_MODULE_LEN, &length);
+ if (status)
+ return status;
+
+ /* sanity check that we have at least enough words to store the netlist ID block */
+ if (length < ICE_NETLIST_ID_BLK_SIZE) {
+ ice_debug(hw, ICE_DBG_NVM, "Netlist Link Topology module too small. Expected at least %u words, but got %u words.\n",
+ ICE_NETLIST_ID_BLK_SIZE, length);
+ return -EIO;
+ }
+
+ status = ice_read_netlist_module(hw, bank, ICE_LINK_TOPO_NODE_COUNT, &node_count);
+ if (status)
+ return status;
+ node_count &= ICE_LINK_TOPO_NODE_COUNT_M;
+
+ id_blk = kcalloc(ICE_NETLIST_ID_BLK_SIZE, sizeof(*id_blk), GFP_KERNEL);
+ if (!id_blk)
+ return -ENOMEM;
+
+ /* Read out the entire Netlist ID Block at once. */
+ status = ice_read_flash_module(hw, bank, ICE_SR_NETLIST_BANK_PTR,
+ ICE_NETLIST_ID_BLK_OFFSET(node_count) * sizeof(u16),
+ (u8 *)id_blk, ICE_NETLIST_ID_BLK_SIZE * sizeof(u16));
+ if (status)
+ goto exit_error;
+
+ for (i = 0; i < ICE_NETLIST_ID_BLK_SIZE; i++)
+ id_blk[i] = le16_to_cpu(((__force __le16 *)id_blk)[i]);
+
+ netlist->major = id_blk[ICE_NETLIST_ID_BLK_MAJOR_VER_HIGH] << 16 |
+ id_blk[ICE_NETLIST_ID_BLK_MAJOR_VER_LOW];
+ netlist->minor = id_blk[ICE_NETLIST_ID_BLK_MINOR_VER_HIGH] << 16 |
+ id_blk[ICE_NETLIST_ID_BLK_MINOR_VER_LOW];
+ netlist->type = id_blk[ICE_NETLIST_ID_BLK_TYPE_HIGH] << 16 |
+ id_blk[ICE_NETLIST_ID_BLK_TYPE_LOW];
+ netlist->rev = id_blk[ICE_NETLIST_ID_BLK_REV_HIGH] << 16 |
+ id_blk[ICE_NETLIST_ID_BLK_REV_LOW];
+ netlist->cust_ver = id_blk[ICE_NETLIST_ID_BLK_CUST_VER];
+ /* Read the left most 4 bytes of SHA */
+ netlist->hash = id_blk[ICE_NETLIST_ID_BLK_SHA_HASH_WORD(15)] << 16 |
+ id_blk[ICE_NETLIST_ID_BLK_SHA_HASH_WORD(14)];
+
+exit_error:
+ kfree(id_blk);
+
+ return status;
+}
+
+/**
+ * ice_get_inactive_netlist_ver
+ * @hw: pointer to the HW struct
+ * @netlist: pointer to netlist version info structure
+ *
+ * Read the netlist version data from the inactive netlist bank. Used to
+ * extract version data of a pending flash update in order to display the
+ * version data.
+ */
+int ice_get_inactive_netlist_ver(struct ice_hw *hw, struct ice_netlist_info *netlist)
+{
+ return ice_get_netlist_info(hw, ICE_INACTIVE_FLASH_BANK, netlist);
+}
+
+/**
+ * ice_discover_flash_size - Discover the available flash size.
+ * @hw: pointer to the HW struct
+ *
+ * The device flash could be up to 16MB in size. However, it is possible that
+ * the actual size is smaller. Use bisection to determine the accessible size
+ * of flash memory.
+ */
+static int ice_discover_flash_size(struct ice_hw *hw)
+{
+ u32 min_size = 0, max_size = ICE_AQC_NVM_MAX_OFFSET + 1;
+ int status;
+
+ status = ice_acquire_nvm(hw, ICE_RES_READ);
+ if (status)
+ return status;
+
+ while ((max_size - min_size) > 1) {
+ u32 offset = (max_size + min_size) / 2;
+ u32 len = 1;
+ u8 data;
+
+ status = ice_read_flat_nvm(hw, offset, &len, &data, false);
+ if (status == -EIO &&
+ hw->adminq.sq_last_status == ICE_AQ_RC_EINVAL) {
+ ice_debug(hw, ICE_DBG_NVM, "%s: New upper bound of %u bytes\n",
+ __func__, offset);
+ status = 0;
+ max_size = offset;
+ } else if (!status) {
+ ice_debug(hw, ICE_DBG_NVM, "%s: New lower bound of %u bytes\n",
+ __func__, offset);
+ min_size = offset;
+ } else {
+ /* an unexpected error occurred */
+ goto err_read_flat_nvm;
+ }
+ }
+
+ ice_debug(hw, ICE_DBG_NVM, "Predicted flash size is %u bytes\n", max_size);
+
+ hw->flash.flash_size = max_size;
+
+err_read_flat_nvm:
+ ice_release_nvm(hw);
+
+ return status;
+}
+
+/**
+ * ice_read_sr_pointer - Read the value of a Shadow RAM pointer word
+ * @hw: pointer to the HW structure
+ * @offset: the word offset of the Shadow RAM word to read
+ * @pointer: pointer value read from Shadow RAM
+ *
+ * Read the given Shadow RAM word, and convert it to a pointer value specified
+ * in bytes. This function assumes the specified offset is a valid pointer
+ * word.
+ *
+ * Each pointer word specifies whether it is stored in word size or 4KB
+ * sector size by using the highest bit. The reported pointer value will be in
+ * bytes, intended for flat NVM reads.
+ */
+static int ice_read_sr_pointer(struct ice_hw *hw, u16 offset, u32 *pointer)
+{
+ int status;
+ u16 value;
+
+ status = ice_read_sr_word(hw, offset, &value);
+ if (status)
+ return status;
+
+ /* Determine if the pointer is in 4KB or word units */
+ if (value & ICE_SR_NVM_PTR_4KB_UNITS)
+ *pointer = (value & ~ICE_SR_NVM_PTR_4KB_UNITS) * 4 * 1024;
+ else
+ *pointer = value * 2;
+
+ return 0;
+}
+
+/**
+ * ice_read_sr_area_size - Read an area size from a Shadow RAM word
+ * @hw: pointer to the HW structure
+ * @offset: the word offset of the Shadow RAM to read
+ * @size: size value read from the Shadow RAM
+ *
+ * Read the given Shadow RAM word, and convert it to an area size value
+ * specified in bytes. This function assumes the specified offset is a valid
+ * area size word.
+ *
+ * Each area size word is specified in 4KB sector units. This function reports
+ * the size in bytes, intended for flat NVM reads.
+ */
+static int ice_read_sr_area_size(struct ice_hw *hw, u16 offset, u32 *size)
+{
+ int status;
+ u16 value;
+
+ status = ice_read_sr_word(hw, offset, &value);
+ if (status)
+ return status;
+
+ /* Area sizes are always specified in 4KB units */
+ *size = value * 4 * 1024;
+
+ return 0;
+}
+
+/**
+ * ice_determine_active_flash_banks - Discover active bank for each module
+ * @hw: pointer to the HW struct
+ *
+ * Read the Shadow RAM control word and determine which banks are active for
+ * the NVM, OROM, and Netlist modules. Also read and calculate the associated
+ * pointer and size. These values are then cached into the ice_flash_info
+ * structure for later use in order to calculate the correct offset to read
+ * from the active module.
+ */
+static int ice_determine_active_flash_banks(struct ice_hw *hw)
+{
+ struct ice_bank_info *banks = &hw->flash.banks;
+ u16 ctrl_word;
+ int status;
+
+ status = ice_read_sr_word(hw, ICE_SR_NVM_CTRL_WORD, &ctrl_word);
+ if (status) {
+ ice_debug(hw, ICE_DBG_NVM, "Failed to read the Shadow RAM control word\n");
+ return status;
+ }
+
+ /* Check that the control word indicates validity */
+ if ((ctrl_word & ICE_SR_CTRL_WORD_1_M) >> ICE_SR_CTRL_WORD_1_S != ICE_SR_CTRL_WORD_VALID) {
+ ice_debug(hw, ICE_DBG_NVM, "Shadow RAM control word is invalid\n");
+ return -EIO;
+ }
+
+ if (!(ctrl_word & ICE_SR_CTRL_WORD_NVM_BANK))
+ banks->nvm_bank = ICE_1ST_FLASH_BANK;
+ else
+ banks->nvm_bank = ICE_2ND_FLASH_BANK;
+
+ if (!(ctrl_word & ICE_SR_CTRL_WORD_OROM_BANK))
+ banks->orom_bank = ICE_1ST_FLASH_BANK;
+ else
+ banks->orom_bank = ICE_2ND_FLASH_BANK;
+
+ if (!(ctrl_word & ICE_SR_CTRL_WORD_NETLIST_BANK))
+ banks->netlist_bank = ICE_1ST_FLASH_BANK;
+ else
+ banks->netlist_bank = ICE_2ND_FLASH_BANK;
+
+ status = ice_read_sr_pointer(hw, ICE_SR_1ST_NVM_BANK_PTR, &banks->nvm_ptr);
+ if (status) {
+ ice_debug(hw, ICE_DBG_NVM, "Failed to read NVM bank pointer\n");
+ return status;
+ }
+
+ status = ice_read_sr_area_size(hw, ICE_SR_NVM_BANK_SIZE, &banks->nvm_size);
+ if (status) {
+ ice_debug(hw, ICE_DBG_NVM, "Failed to read NVM bank area size\n");
+ return status;
+ }
+
+ status = ice_read_sr_pointer(hw, ICE_SR_1ST_OROM_BANK_PTR, &banks->orom_ptr);
+ if (status) {
+ ice_debug(hw, ICE_DBG_NVM, "Failed to read OROM bank pointer\n");
+ return status;
+ }
+
+ status = ice_read_sr_area_size(hw, ICE_SR_OROM_BANK_SIZE, &banks->orom_size);
+ if (status) {
+ ice_debug(hw, ICE_DBG_NVM, "Failed to read OROM bank area size\n");
+ return status;
+ }
+
+ status = ice_read_sr_pointer(hw, ICE_SR_NETLIST_BANK_PTR, &banks->netlist_ptr);
+ if (status) {
+ ice_debug(hw, ICE_DBG_NVM, "Failed to read Netlist bank pointer\n");
+ return status;
+ }
+
+ status = ice_read_sr_area_size(hw, ICE_SR_NETLIST_BANK_SIZE, &banks->netlist_size);
+ if (status) {
+ ice_debug(hw, ICE_DBG_NVM, "Failed to read Netlist bank area size\n");
+ return status;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_init_nvm - initializes NVM setting
+ * @hw: pointer to the HW struct
+ *
+ * This function reads and populates NVM settings such as Shadow RAM size,
+ * max_timeout, and blank_nvm_mode
+ */
+int ice_init_nvm(struct ice_hw *hw)
+{
+ struct ice_flash_info *flash = &hw->flash;
+ u32 fla, gens_stat;
+ u8 sr_size;
+ int status;
+
+ /* The SR size is stored regardless of the NVM programming mode
+ * as the blank mode may be used in the factory line.
+ */
+ gens_stat = rd32(hw, GLNVM_GENS);
+ sr_size = (gens_stat & GLNVM_GENS_SR_SIZE_M) >> GLNVM_GENS_SR_SIZE_S;
+
+ /* Switching to words (sr_size contains power of 2) */
+ flash->sr_words = BIT(sr_size) * ICE_SR_WORDS_IN_1KB;
+
+ /* Check if we are in the normal or blank NVM programming mode */
+ fla = rd32(hw, GLNVM_FLA);
+ if (fla & GLNVM_FLA_LOCKED_M) { /* Normal programming mode */
+ flash->blank_nvm_mode = false;
+ } else {
+ /* Blank programming mode */
+ flash->blank_nvm_mode = true;
+ ice_debug(hw, ICE_DBG_NVM, "NVM init error: unsupported blank mode.\n");
+ return -EIO;
+ }
+
+ status = ice_discover_flash_size(hw);
+ if (status) {
+ ice_debug(hw, ICE_DBG_NVM, "NVM init error: failed to discover flash size.\n");
+ return status;
+ }
+
+ status = ice_determine_active_flash_banks(hw);
+ if (status) {
+ ice_debug(hw, ICE_DBG_NVM, "Failed to determine active flash banks.\n");
+ return status;
+ }
+
+ status = ice_get_nvm_ver_info(hw, ICE_ACTIVE_FLASH_BANK, &flash->nvm);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read NVM info.\n");
+ return status;
+ }
+
+ status = ice_get_orom_ver_info(hw, ICE_ACTIVE_FLASH_BANK, &flash->orom);
+ if (status)
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read Option ROM info.\n");
+
+ /* read the netlist version information */
+ status = ice_get_netlist_info(hw, ICE_ACTIVE_FLASH_BANK, &flash->netlist);
+ if (status)
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read netlist info.\n");
+
+ return 0;
+}
+
+/**
+ * ice_nvm_validate_checksum
+ * @hw: pointer to the HW struct
+ *
+ * Verify NVM PFA checksum validity (0x0706)
+ */
+int ice_nvm_validate_checksum(struct ice_hw *hw)
+{
+ struct ice_aqc_nvm_checksum *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ status = ice_acquire_nvm(hw, ICE_RES_READ);
+ if (status)
+ return status;
+
+ cmd = &desc.params.nvm_checksum;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_checksum);
+ cmd->flags = ICE_AQC_NVM_CHECKSUM_VERIFY;
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+ ice_release_nvm(hw);
+
+ if (!status)
+ if (le16_to_cpu(cmd->checksum) != ICE_AQC_NVM_CHECKSUM_CORRECT)
+ status = -EIO;
+
+ return status;
+}
+
+/**
+ * ice_nvm_write_activate
+ * @hw: pointer to the HW struct
+ * @cmd_flags: flags for write activate command
+ * @response_flags: response indicators from firmware
+ *
+ * Update the control word with the required banks' validity bits
+ * and dumps the Shadow RAM to flash (0x0707)
+ *
+ * cmd_flags controls which banks to activate, the preservation level to use
+ * when activating the NVM bank, and whether an EMP reset is required for
+ * activation.
+ *
+ * Note that the 16bit cmd_flags value is split between two separate 1 byte
+ * flag values in the descriptor.
+ *
+ * On successful return of the firmware command, the response_flags variable
+ * is updated with the flags reported by firmware indicating certain status,
+ * such as whether EMP reset is enabled.
+ */
+int ice_nvm_write_activate(struct ice_hw *hw, u16 cmd_flags, u8 *response_flags)
+{
+ struct ice_aqc_nvm *cmd;
+ struct ice_aq_desc desc;
+ int err;
+
+ cmd = &desc.params.nvm;
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_write_activate);
+
+ cmd->cmd_flags = (u8)(cmd_flags & 0xFF);
+ cmd->offset_high = (u8)((cmd_flags >> 8) & 0xFF);
+
+ err = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+ if (!err && response_flags)
+ *response_flags = cmd->cmd_flags;
+
+ return err;
+}
+
+/**
+ * ice_aq_nvm_update_empr
+ * @hw: pointer to the HW struct
+ *
+ * Update empr (0x0709). This command allows SW to
+ * request an EMPR to activate new FW.
+ */
+int ice_aq_nvm_update_empr(struct ice_hw *hw)
+{
+ struct ice_aq_desc desc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_update_empr);
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/* ice_nvm_set_pkg_data
+ * @hw: pointer to the HW struct
+ * @del_pkg_data_flag: If is set then the current pkg_data store by FW
+ * is deleted.
+ * If bit is set to 1, then buffer should be size 0.
+ * @data: pointer to buffer
+ * @length: length of the buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set package data (0x070A). This command is equivalent to the reception
+ * of a PLDM FW Update GetPackageData cmd. This command should be sent
+ * as part of the NVM update as the first cmd in the flow.
+ */
+
+int
+ice_nvm_set_pkg_data(struct ice_hw *hw, bool del_pkg_data_flag, u8 *data,
+ u16 length, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_nvm_pkg_data *cmd;
+ struct ice_aq_desc desc;
+
+ if (length != 0 && !data)
+ return -EINVAL;
+
+ cmd = &desc.params.pkg_data;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_pkg_data);
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ if (del_pkg_data_flag)
+ cmd->cmd_flags |= ICE_AQC_NVM_PKG_DELETE;
+
+ return ice_aq_send_cmd(hw, &desc, data, length, cd);
+}
+
+/* ice_nvm_pass_component_tbl
+ * @hw: pointer to the HW struct
+ * @data: pointer to buffer
+ * @length: length of the buffer
+ * @transfer_flag: parameter for determining stage of the update
+ * @comp_response: a pointer to the response from the 0x070B AQC.
+ * @comp_response_code: a pointer to the response code from the 0x070B AQC.
+ * @cd: pointer to command details structure or NULL
+ *
+ * Pass component table (0x070B). This command is equivalent to the reception
+ * of a PLDM FW Update PassComponentTable cmd. This command should be sent once
+ * per component. It can be only sent after Set Package Data cmd and before
+ * actual update. FW will assume these commands are going to be sent until
+ * the TransferFlag is set to End or StartAndEnd.
+ */
+
+int
+ice_nvm_pass_component_tbl(struct ice_hw *hw, u8 *data, u16 length,
+ u8 transfer_flag, u8 *comp_response,
+ u8 *comp_response_code, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_nvm_pass_comp_tbl *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ if (!data || !comp_response || !comp_response_code)
+ return -EINVAL;
+
+ cmd = &desc.params.pass_comp_tbl;
+
+ ice_fill_dflt_direct_cmd_desc(&desc,
+ ice_aqc_opc_nvm_pass_component_tbl);
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ cmd->transfer_flag = transfer_flag;
+ status = ice_aq_send_cmd(hw, &desc, data, length, cd);
+
+ if (!status) {
+ *comp_response = cmd->component_response;
+ *comp_response_code = cmd->component_response_code;
+ }
+ return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h
new file mode 100644
index 000000000..774c23179
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_NVM_H_
+#define _ICE_NVM_H_
+
+struct ice_orom_civd_info {
+ u8 signature[4]; /* Must match ASCII '$CIV' characters */
+ u8 checksum; /* Simple modulo 256 sum of all structure bytes must equal 0 */
+ __le32 combo_ver; /* Combo Image Version number */
+ u8 combo_name_len; /* Length of the unicode combo image version string, max of 32 */
+ __le16 combo_name[32]; /* Unicode string representing the Combo Image version */
+} __packed;
+
+int ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access);
+void ice_release_nvm(struct ice_hw *hw);
+int
+ice_read_flat_nvm(struct ice_hw *hw, u32 offset, u32 *length, u8 *data,
+ bool read_shadow_ram);
+int
+ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
+ u16 module_type);
+int ice_get_inactive_orom_ver(struct ice_hw *hw, struct ice_orom_info *orom);
+int ice_get_inactive_nvm_ver(struct ice_hw *hw, struct ice_nvm_info *nvm);
+int
+ice_get_inactive_netlist_ver(struct ice_hw *hw, struct ice_netlist_info *netlist);
+int ice_read_pba_string(struct ice_hw *hw, u8 *pba_num, u32 pba_num_size);
+int ice_init_nvm(struct ice_hw *hw);
+int ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data);
+int
+ice_aq_update_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset,
+ u16 length, void *data, bool last_command, u8 command_flags,
+ struct ice_sq_cd *cd);
+int
+ice_aq_erase_nvm(struct ice_hw *hw, u16 module_typeid, struct ice_sq_cd *cd);
+int ice_nvm_validate_checksum(struct ice_hw *hw);
+int ice_nvm_write_activate(struct ice_hw *hw, u16 cmd_flags, u8 *response_flags);
+int ice_aq_nvm_update_empr(struct ice_hw *hw);
+int
+ice_nvm_set_pkg_data(struct ice_hw *hw, bool del_pkg_data_flag, u8 *data,
+ u16 length, struct ice_sq_cd *cd);
+int
+ice_nvm_pass_component_tbl(struct ice_hw *hw, u8 *data, u16 length,
+ u8 transfer_flag, u8 *comp_response,
+ u8 *comp_response_code, struct ice_sq_cd *cd);
+#endif /* _ICE_NVM_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_osdep.h b/drivers/net/ethernet/intel/ice/ice_osdep.h
new file mode 100644
index 000000000..82bc54fec
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_osdep.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_OSDEP_H_
+#define _ICE_OSDEP_H_
+
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/bitops.h>
+#include <linux/ethtool.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/pci_ids.h>
+#ifndef CONFIG_64BIT
+#include <linux/io-64-nonatomic-lo-hi.h>
+#endif
+#include <net/udp_tunnel.h>
+
+#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg)))
+#define rd32(a, reg) readl((a)->hw_addr + (reg))
+#define wr64(a, reg, value) writeq((value), ((a)->hw_addr + (reg)))
+#define rd64(a, reg) readq((a)->hw_addr + (reg))
+
+#define ice_flush(a) rd32((a), GLGEN_STAT)
+#define ICE_M(m, s) ((m) << (s))
+
+struct ice_dma_mem {
+ void *va;
+ dma_addr_t pa;
+ size_t size;
+};
+
+struct ice_hw;
+struct device *ice_hw_to_dev(struct ice_hw *hw);
+
+#ifdef CONFIG_DYNAMIC_DEBUG
+#define ice_debug(hw, type, fmt, args...) \
+ dev_dbg(ice_hw_to_dev(hw), fmt, ##args)
+
+#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+ print_hex_dump_debug(KBUILD_MODNAME " ", \
+ DUMP_PREFIX_OFFSET, rowsize, \
+ groupsize, buf, len, false)
+#else
+#define ice_debug(hw, type, fmt, args...) \
+do { \
+ if ((type) & (hw)->debug_mask) \
+ dev_info(ice_hw_to_dev(hw), fmt, ##args); \
+} while (0)
+
+#ifdef DEBUG
+#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+do { \
+ if ((type) & (hw)->debug_mask) \
+ print_hex_dump_debug(KBUILD_MODNAME, \
+ DUMP_PREFIX_OFFSET, \
+ rowsize, groupsize, buf, \
+ len, false); \
+} while (0)
+#else
+#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+do { \
+ struct ice_hw *hw_l = hw; \
+ if ((type) & (hw_l)->debug_mask) { \
+ u16 len_l = len; \
+ u8 *buf_l = buf; \
+ int i; \
+ for (i = 0; i < (len_l - 16); i += 16) \
+ ice_debug(hw_l, type, "0x%04X %16ph\n",\
+ i, ((buf_l) + i)); \
+ if (i < len_l) \
+ ice_debug(hw_l, type, "0x%04X %*ph\n", \
+ i, ((len_l) - i), ((buf_l) + i));\
+ } \
+} while (0)
+#endif /* DEBUG */
+#endif /* CONFIG_DYNAMIC_DEBUG */
+
+#endif /* _ICE_OSDEP_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.c
new file mode 100644
index 000000000..976a03d3b
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice_vsi_vlan_ops.h"
+#include "ice_vsi_vlan_lib.h"
+#include "ice_vlan_mode.h"
+#include "ice.h"
+#include "ice_pf_vsi_vlan_ops.h"
+
+void ice_pf_vsi_init_vlan_ops(struct ice_vsi *vsi)
+{
+ struct ice_vsi_vlan_ops *vlan_ops;
+
+ if (ice_is_dvm_ena(&vsi->back->hw)) {
+ vlan_ops = &vsi->outer_vlan_ops;
+
+ vlan_ops->add_vlan = ice_vsi_add_vlan;
+ vlan_ops->del_vlan = ice_vsi_del_vlan;
+ vlan_ops->ena_stripping = ice_vsi_ena_outer_stripping;
+ vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping;
+ vlan_ops->ena_insertion = ice_vsi_ena_outer_insertion;
+ vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion;
+ vlan_ops->ena_rx_filtering = ice_vsi_ena_rx_vlan_filtering;
+ vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
+ } else {
+ vlan_ops = &vsi->inner_vlan_ops;
+
+ vlan_ops->add_vlan = ice_vsi_add_vlan;
+ vlan_ops->del_vlan = ice_vsi_del_vlan;
+ vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
+ vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
+ vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
+ vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
+ vlan_ops->ena_rx_filtering = ice_vsi_ena_rx_vlan_filtering;
+ vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
+ }
+}
+
diff --git a/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.h b/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.h
new file mode 100644
index 000000000..6741ec8c5
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_PF_VSI_VLAN_OPS_H_
+#define _ICE_PF_VSI_VLAN_OPS_H_
+
+#include "ice_vsi_vlan_ops.h"
+
+struct ice_vsi;
+
+void ice_pf_vsi_init_vlan_ops(struct ice_vsi *vsi);
+
+#endif /* _ICE_PF_VSI_VLAN_OPS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
new file mode 100644
index 000000000..02a4e1cf6
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
@@ -0,0 +1,290 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_PROTOCOL_TYPE_H_
+#define _ICE_PROTOCOL_TYPE_H_
+#define ICE_IPV6_ADDR_LENGTH 16
+
+/* Each recipe can match up to 5 different fields. Fields to match can be meta-
+ * data, values extracted from packet headers, or results from other recipes.
+ * One of the 5 fields is reserved for matching the switch ID. So, up to 4
+ * recipes can provide intermediate results to another one through chaining,
+ * e.g. recipes 0, 1, 2, and 3 can provide intermediate results to recipe 4.
+ */
+#define ICE_NUM_WORDS_RECIPE 4
+
+/* Max recipes that can be chained */
+#define ICE_MAX_CHAIN_RECIPE 5
+
+/* 1 word reserved for switch ID from allowed 5 words.
+ * So a recipe can have max 4 words. And you can chain 5 such recipes
+ * together. So maximum words that can be programmed for look up is 5 * 4.
+ */
+#define ICE_MAX_CHAIN_WORDS (ICE_NUM_WORDS_RECIPE * ICE_MAX_CHAIN_RECIPE)
+
+/* Field vector index corresponding to chaining */
+#define ICE_CHAIN_FV_INDEX_START 47
+
+enum ice_protocol_type {
+ ICE_MAC_OFOS = 0,
+ ICE_MAC_IL,
+ ICE_ETYPE_OL,
+ ICE_ETYPE_IL,
+ ICE_VLAN_OFOS,
+ ICE_IPV4_OFOS,
+ ICE_IPV4_IL,
+ ICE_IPV6_OFOS,
+ ICE_IPV6_IL,
+ ICE_TCP_IL,
+ ICE_UDP_OF,
+ ICE_UDP_ILOS,
+ ICE_VXLAN,
+ ICE_GENEVE,
+ ICE_NVGRE,
+ ICE_GTP,
+ ICE_GTP_NO_PAY,
+ ICE_PPPOE,
+ ICE_L2TPV3,
+ ICE_VLAN_EX,
+ ICE_VLAN_IN,
+ ICE_VXLAN_GPE,
+ ICE_SCTP_IL,
+ ICE_PROTOCOL_LAST
+};
+
+enum ice_sw_tunnel_type {
+ ICE_NON_TUN = 0,
+ ICE_SW_TUN_AND_NON_TUN,
+ ICE_SW_TUN_VXLAN,
+ ICE_SW_TUN_GENEVE,
+ ICE_SW_TUN_NVGRE,
+ ICE_SW_TUN_GTPU,
+ ICE_SW_TUN_GTPC,
+ ICE_ALL_TUNNELS /* All tunnel types including NVGRE */
+};
+
+/* Decoders for ice_prot_id:
+ * - F: First
+ * - I: Inner
+ * - L: Last
+ * - O: Outer
+ * - S: Single
+ */
+enum ice_prot_id {
+ ICE_PROT_ID_INVAL = 0,
+ ICE_PROT_MAC_OF_OR_S = 1,
+ ICE_PROT_MAC_IL = 4,
+ ICE_PROT_ETYPE_OL = 9,
+ ICE_PROT_ETYPE_IL = 10,
+ ICE_PROT_IPV4_OF_OR_S = 32,
+ ICE_PROT_IPV4_IL = 33,
+ ICE_PROT_IPV6_OF_OR_S = 40,
+ ICE_PROT_IPV6_IL = 41,
+ ICE_PROT_TCP_IL = 49,
+ ICE_PROT_UDP_OF = 52,
+ ICE_PROT_UDP_IL_OR_S = 53,
+ ICE_PROT_GRE_OF = 64,
+ ICE_PROT_ESP_F = 88,
+ ICE_PROT_ESP_2 = 89,
+ ICE_PROT_SCTP_IL = 96,
+ ICE_PROT_ICMP_IL = 98,
+ ICE_PROT_ICMPV6_IL = 100,
+ ICE_PROT_PPPOE = 103,
+ ICE_PROT_L2TPV3 = 104,
+ ICE_PROT_ARP_OF = 118,
+ ICE_PROT_META_ID = 255, /* when offset == metadata */
+ ICE_PROT_INVALID = 255 /* when offset == ICE_FV_OFFSET_INVAL */
+};
+
+#define ICE_VNI_OFFSET 12 /* offset of VNI from ICE_PROT_UDP_OF */
+
+#define ICE_MAC_OFOS_HW 1
+#define ICE_MAC_IL_HW 4
+#define ICE_ETYPE_OL_HW 9
+#define ICE_ETYPE_IL_HW 10
+#define ICE_VLAN_OF_HW 16
+#define ICE_VLAN_OL_HW 17
+#define ICE_IPV4_OFOS_HW 32
+#define ICE_IPV4_IL_HW 33
+#define ICE_IPV6_OFOS_HW 40
+#define ICE_IPV6_IL_HW 41
+#define ICE_TCP_IL_HW 49
+#define ICE_UDP_ILOS_HW 53
+#define ICE_GRE_OF_HW 64
+#define ICE_PPPOE_HW 103
+#define ICE_L2TPV3_HW 104
+
+#define ICE_UDP_OF_HW 52 /* UDP Tunnels */
+#define ICE_META_DATA_ID_HW 255 /* this is used for tunnel and VLAN type */
+
+#define ICE_MDID_SIZE 2
+
+#define ICE_TUN_FLAG_MDID 21
+#define ICE_TUN_FLAG_MDID_OFF (ICE_MDID_SIZE * ICE_TUN_FLAG_MDID)
+#define ICE_TUN_FLAG_MASK 0xFF
+
+#define ICE_VLAN_FLAG_MDID 20
+#define ICE_VLAN_FLAG_MDID_OFF (ICE_MDID_SIZE * ICE_VLAN_FLAG_MDID)
+#define ICE_PKT_FLAGS_0_TO_15_VLAN_FLAGS_MASK 0xD000
+
+#define ICE_TUN_FLAG_FV_IND 2
+
+/* Mapping of software defined protocol ID to hardware defined protocol ID */
+struct ice_protocol_entry {
+ enum ice_protocol_type type;
+ u8 protocol_id;
+};
+
+struct ice_ether_hdr {
+ u8 dst_addr[ETH_ALEN];
+ u8 src_addr[ETH_ALEN];
+};
+
+struct ice_ethtype_hdr {
+ __be16 ethtype_id;
+};
+
+struct ice_ether_vlan_hdr {
+ u8 dst_addr[ETH_ALEN];
+ u8 src_addr[ETH_ALEN];
+ __be32 vlan_id;
+};
+
+struct ice_vlan_hdr {
+ __be16 type;
+ __be16 vlan;
+};
+
+struct ice_ipv4_hdr {
+ u8 version;
+ u8 tos;
+ __be16 total_length;
+ __be16 id;
+ __be16 frag_off;
+ u8 time_to_live;
+ u8 protocol;
+ __be16 check;
+ __be32 src_addr;
+ __be32 dst_addr;
+};
+
+struct ice_ipv6_hdr {
+ __be32 be_ver_tc_flow;
+ __be16 payload_len;
+ u8 next_hdr;
+ u8 hop_limit;
+ u8 src_addr[ICE_IPV6_ADDR_LENGTH];
+ u8 dst_addr[ICE_IPV6_ADDR_LENGTH];
+};
+
+struct ice_sctp_hdr {
+ __be16 src_port;
+ __be16 dst_port;
+ __be32 verification_tag;
+ __be32 check;
+};
+
+struct ice_l4_hdr {
+ __be16 src_port;
+ __be16 dst_port;
+ __be16 len;
+ __be16 check;
+};
+
+struct ice_udp_tnl_hdr {
+ __be16 field;
+ __be16 proto_type;
+ __be32 vni; /* only use lower 24-bits */
+};
+
+struct ice_udp_gtp_hdr {
+ u8 flags;
+ u8 msg_type;
+ __be16 rsrvd_len;
+ __be32 teid;
+ __be16 rsrvd_seq_nbr;
+ u8 rsrvd_n_pdu_nbr;
+ u8 rsrvd_next_ext;
+ u8 rsvrd_ext_len;
+ u8 pdu_type;
+ u8 qfi;
+ u8 rsvrd;
+};
+
+struct ice_pppoe_hdr {
+ u8 rsrvd_ver_type;
+ u8 rsrvd_code;
+ __be16 session_id;
+ __be16 length;
+ __be16 ppp_prot_id; /* control and data only */
+};
+
+struct ice_l2tpv3_sess_hdr {
+ __be32 session_id;
+ __be64 cookie;
+};
+
+struct ice_nvgre_hdr {
+ __be16 flags;
+ __be16 protocol;
+ __be32 tni_flow;
+};
+
+union ice_prot_hdr {
+ struct ice_ether_hdr eth_hdr;
+ struct ice_ethtype_hdr ethertype;
+ struct ice_vlan_hdr vlan_hdr;
+ struct ice_ipv4_hdr ipv4_hdr;
+ struct ice_ipv6_hdr ipv6_hdr;
+ struct ice_l4_hdr l4_hdr;
+ struct ice_sctp_hdr sctp_hdr;
+ struct ice_udp_tnl_hdr tnl_hdr;
+ struct ice_nvgre_hdr nvgre_hdr;
+ struct ice_udp_gtp_hdr gtp_hdr;
+ struct ice_pppoe_hdr pppoe_hdr;
+ struct ice_l2tpv3_sess_hdr l2tpv3_sess_hdr;
+};
+
+/* This is mapping table entry that maps every word within a given protocol
+ * structure to the real byte offset as per the specification of that
+ * protocol header.
+ * for e.g. dst address is 3 words in ethertype header and corresponding bytes
+ * are 0, 2, 3 in the actual packet header and src address is at 4, 6, 8
+ */
+struct ice_prot_ext_tbl_entry {
+ enum ice_protocol_type prot_type;
+ /* Byte offset into header of given protocol type */
+ u8 offs[sizeof(union ice_prot_hdr)];
+};
+
+/* Extractions to be looked up for a given recipe */
+struct ice_prot_lkup_ext {
+ u16 prot_type;
+ u8 n_val_words;
+ /* create a buffer to hold max words per recipe */
+ u16 field_off[ICE_MAX_CHAIN_WORDS];
+ u16 field_mask[ICE_MAX_CHAIN_WORDS];
+
+ struct ice_fv_word fv_words[ICE_MAX_CHAIN_WORDS];
+
+ /* Indicate field offsets that have field vector indices assigned */
+ DECLARE_BITMAP(done, ICE_MAX_CHAIN_WORDS);
+};
+
+struct ice_pref_recipe_group {
+ u8 n_val_pairs; /* Number of valid pairs */
+ struct ice_fv_word pairs[ICE_NUM_WORDS_RECIPE];
+ u16 mask[ICE_NUM_WORDS_RECIPE];
+};
+
+struct ice_recp_grp_entry {
+ struct list_head l_entry;
+
+#define ICE_INVAL_CHAIN_IND 0xFF
+ u16 rid;
+ u8 chain_idx;
+ u16 fv_idx[ICE_NUM_WORDS_RECIPE];
+ u16 fv_mask[ICE_NUM_WORDS_RECIPE];
+ struct ice_pref_recipe_group r_group;
+};
+#endif /* _ICE_PROTOCOL_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
new file mode 100644
index 000000000..46b0063a5
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -0,0 +1,2734 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_trace.h"
+
+#define E810_OUT_PROP_DELAY_NS 1
+
+#define UNKNOWN_INCVAL_E822 0x100000000ULL
+
+static const struct ptp_pin_desc ice_pin_desc_e810t[] = {
+ /* name idx func chan */
+ { "GNSS", GNSS, PTP_PF_EXTTS, 0, { 0, } },
+ { "SMA1", SMA1, PTP_PF_NONE, 1, { 0, } },
+ { "U.FL1", UFL1, PTP_PF_NONE, 1, { 0, } },
+ { "SMA2", SMA2, PTP_PF_NONE, 2, { 0, } },
+ { "U.FL2", UFL2, PTP_PF_NONE, 2, { 0, } },
+};
+
+/**
+ * ice_get_sma_config_e810t
+ * @hw: pointer to the hw struct
+ * @ptp_pins: pointer to the ptp_pin_desc struture
+ *
+ * Read the configuration of the SMA control logic and put it into the
+ * ptp_pin_desc structure
+ */
+static int
+ice_get_sma_config_e810t(struct ice_hw *hw, struct ptp_pin_desc *ptp_pins)
+{
+ u8 data, i;
+ int status;
+
+ /* Read initial pin state */
+ status = ice_read_sma_ctrl_e810t(hw, &data);
+ if (status)
+ return status;
+
+ /* initialize with defaults */
+ for (i = 0; i < NUM_PTP_PINS_E810T; i++) {
+ snprintf(ptp_pins[i].name, sizeof(ptp_pins[i].name),
+ "%s", ice_pin_desc_e810t[i].name);
+ ptp_pins[i].index = ice_pin_desc_e810t[i].index;
+ ptp_pins[i].func = ice_pin_desc_e810t[i].func;
+ ptp_pins[i].chan = ice_pin_desc_e810t[i].chan;
+ }
+
+ /* Parse SMA1/UFL1 */
+ switch (data & ICE_SMA1_MASK_E810T) {
+ case ICE_SMA1_MASK_E810T:
+ default:
+ ptp_pins[SMA1].func = PTP_PF_NONE;
+ ptp_pins[UFL1].func = PTP_PF_NONE;
+ break;
+ case ICE_SMA1_DIR_EN_E810T:
+ ptp_pins[SMA1].func = PTP_PF_PEROUT;
+ ptp_pins[UFL1].func = PTP_PF_NONE;
+ break;
+ case ICE_SMA1_TX_EN_E810T:
+ ptp_pins[SMA1].func = PTP_PF_EXTTS;
+ ptp_pins[UFL1].func = PTP_PF_NONE;
+ break;
+ case 0:
+ ptp_pins[SMA1].func = PTP_PF_EXTTS;
+ ptp_pins[UFL1].func = PTP_PF_PEROUT;
+ break;
+ }
+
+ /* Parse SMA2/UFL2 */
+ switch (data & ICE_SMA2_MASK_E810T) {
+ case ICE_SMA2_MASK_E810T:
+ default:
+ ptp_pins[SMA2].func = PTP_PF_NONE;
+ ptp_pins[UFL2].func = PTP_PF_NONE;
+ break;
+ case (ICE_SMA2_TX_EN_E810T | ICE_SMA2_UFL2_RX_DIS_E810T):
+ ptp_pins[SMA2].func = PTP_PF_EXTTS;
+ ptp_pins[UFL2].func = PTP_PF_NONE;
+ break;
+ case (ICE_SMA2_DIR_EN_E810T | ICE_SMA2_UFL2_RX_DIS_E810T):
+ ptp_pins[SMA2].func = PTP_PF_PEROUT;
+ ptp_pins[UFL2].func = PTP_PF_NONE;
+ break;
+ case (ICE_SMA2_DIR_EN_E810T | ICE_SMA2_TX_EN_E810T):
+ ptp_pins[SMA2].func = PTP_PF_NONE;
+ ptp_pins[UFL2].func = PTP_PF_EXTTS;
+ break;
+ case ICE_SMA2_DIR_EN_E810T:
+ ptp_pins[SMA2].func = PTP_PF_PEROUT;
+ ptp_pins[UFL2].func = PTP_PF_EXTTS;
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_ptp_set_sma_config_e810t
+ * @hw: pointer to the hw struct
+ * @ptp_pins: pointer to the ptp_pin_desc struture
+ *
+ * Set the configuration of the SMA control logic based on the configuration in
+ * num_pins parameter
+ */
+static int
+ice_ptp_set_sma_config_e810t(struct ice_hw *hw,
+ const struct ptp_pin_desc *ptp_pins)
+{
+ int status;
+ u8 data;
+
+ /* SMA1 and UFL1 cannot be set to TX at the same time */
+ if (ptp_pins[SMA1].func == PTP_PF_PEROUT &&
+ ptp_pins[UFL1].func == PTP_PF_PEROUT)
+ return -EINVAL;
+
+ /* SMA2 and UFL2 cannot be set to RX at the same time */
+ if (ptp_pins[SMA2].func == PTP_PF_EXTTS &&
+ ptp_pins[UFL2].func == PTP_PF_EXTTS)
+ return -EINVAL;
+
+ /* Read initial pin state value */
+ status = ice_read_sma_ctrl_e810t(hw, &data);
+ if (status)
+ return status;
+
+ /* Set the right sate based on the desired configuration */
+ data &= ~ICE_SMA1_MASK_E810T;
+ if (ptp_pins[SMA1].func == PTP_PF_NONE &&
+ ptp_pins[UFL1].func == PTP_PF_NONE) {
+ dev_info(ice_hw_to_dev(hw), "SMA1 + U.FL1 disabled");
+ data |= ICE_SMA1_MASK_E810T;
+ } else if (ptp_pins[SMA1].func == PTP_PF_EXTTS &&
+ ptp_pins[UFL1].func == PTP_PF_NONE) {
+ dev_info(ice_hw_to_dev(hw), "SMA1 RX");
+ data |= ICE_SMA1_TX_EN_E810T;
+ } else if (ptp_pins[SMA1].func == PTP_PF_NONE &&
+ ptp_pins[UFL1].func == PTP_PF_PEROUT) {
+ /* U.FL 1 TX will always enable SMA 1 RX */
+ dev_info(ice_hw_to_dev(hw), "SMA1 RX + U.FL1 TX");
+ } else if (ptp_pins[SMA1].func == PTP_PF_EXTTS &&
+ ptp_pins[UFL1].func == PTP_PF_PEROUT) {
+ dev_info(ice_hw_to_dev(hw), "SMA1 RX + U.FL1 TX");
+ } else if (ptp_pins[SMA1].func == PTP_PF_PEROUT &&
+ ptp_pins[UFL1].func == PTP_PF_NONE) {
+ dev_info(ice_hw_to_dev(hw), "SMA1 TX");
+ data |= ICE_SMA1_DIR_EN_E810T;
+ }
+
+ data &= ~ICE_SMA2_MASK_E810T;
+ if (ptp_pins[SMA2].func == PTP_PF_NONE &&
+ ptp_pins[UFL2].func == PTP_PF_NONE) {
+ dev_info(ice_hw_to_dev(hw), "SMA2 + U.FL2 disabled");
+ data |= ICE_SMA2_MASK_E810T;
+ } else if (ptp_pins[SMA2].func == PTP_PF_EXTTS &&
+ ptp_pins[UFL2].func == PTP_PF_NONE) {
+ dev_info(ice_hw_to_dev(hw), "SMA2 RX");
+ data |= (ICE_SMA2_TX_EN_E810T |
+ ICE_SMA2_UFL2_RX_DIS_E810T);
+ } else if (ptp_pins[SMA2].func == PTP_PF_NONE &&
+ ptp_pins[UFL2].func == PTP_PF_EXTTS) {
+ dev_info(ice_hw_to_dev(hw), "UFL2 RX");
+ data |= (ICE_SMA2_DIR_EN_E810T | ICE_SMA2_TX_EN_E810T);
+ } else if (ptp_pins[SMA2].func == PTP_PF_PEROUT &&
+ ptp_pins[UFL2].func == PTP_PF_NONE) {
+ dev_info(ice_hw_to_dev(hw), "SMA2 TX");
+ data |= (ICE_SMA2_DIR_EN_E810T |
+ ICE_SMA2_UFL2_RX_DIS_E810T);
+ } else if (ptp_pins[SMA2].func == PTP_PF_PEROUT &&
+ ptp_pins[UFL2].func == PTP_PF_EXTTS) {
+ dev_info(ice_hw_to_dev(hw), "SMA2 TX + U.FL2 RX");
+ data |= ICE_SMA2_DIR_EN_E810T;
+ }
+
+ return ice_write_sma_ctrl_e810t(hw, data);
+}
+
+/**
+ * ice_ptp_set_sma_e810t
+ * @info: the driver's PTP info structure
+ * @pin: pin index in kernel structure
+ * @func: Pin function to be set (PTP_PF_NONE, PTP_PF_EXTTS or PTP_PF_PEROUT)
+ *
+ * Set the configuration of a single SMA pin
+ */
+static int
+ice_ptp_set_sma_e810t(struct ptp_clock_info *info, unsigned int pin,
+ enum ptp_pin_function func)
+{
+ struct ptp_pin_desc ptp_pins[NUM_PTP_PINS_E810T];
+ struct ice_pf *pf = ptp_info_to_pf(info);
+ struct ice_hw *hw = &pf->hw;
+ int err;
+
+ if (pin < SMA1 || func > PTP_PF_PEROUT)
+ return -EOPNOTSUPP;
+
+ err = ice_get_sma_config_e810t(hw, ptp_pins);
+ if (err)
+ return err;
+
+ /* Disable the same function on the other pin sharing the channel */
+ if (pin == SMA1 && ptp_pins[UFL1].func == func)
+ ptp_pins[UFL1].func = PTP_PF_NONE;
+ if (pin == UFL1 && ptp_pins[SMA1].func == func)
+ ptp_pins[SMA1].func = PTP_PF_NONE;
+
+ if (pin == SMA2 && ptp_pins[UFL2].func == func)
+ ptp_pins[UFL2].func = PTP_PF_NONE;
+ if (pin == UFL2 && ptp_pins[SMA2].func == func)
+ ptp_pins[SMA2].func = PTP_PF_NONE;
+
+ /* Set up new pin function in the temp table */
+ ptp_pins[pin].func = func;
+
+ return ice_ptp_set_sma_config_e810t(hw, ptp_pins);
+}
+
+/**
+ * ice_verify_pin_e810t
+ * @info: the driver's PTP info structure
+ * @pin: Pin index
+ * @func: Assigned function
+ * @chan: Assigned channel
+ *
+ * Verify if pin supports requested pin function. If the Check pins consistency.
+ * Reconfigure the SMA logic attached to the given pin to enable its
+ * desired functionality
+ */
+static int
+ice_verify_pin_e810t(struct ptp_clock_info *info, unsigned int pin,
+ enum ptp_pin_function func, unsigned int chan)
+{
+ /* Don't allow channel reassignment */
+ if (chan != ice_pin_desc_e810t[pin].chan)
+ return -EOPNOTSUPP;
+
+ /* Check if functions are properly assigned */
+ switch (func) {
+ case PTP_PF_NONE:
+ break;
+ case PTP_PF_EXTTS:
+ if (pin == UFL1)
+ return -EOPNOTSUPP;
+ break;
+ case PTP_PF_PEROUT:
+ if (pin == UFL2 || pin == GNSS)
+ return -EOPNOTSUPP;
+ break;
+ case PTP_PF_PHYSYNC:
+ return -EOPNOTSUPP;
+ }
+
+ return ice_ptp_set_sma_e810t(info, pin, func);
+}
+
+/**
+ * ice_set_tx_tstamp - Enable or disable Tx timestamping
+ * @pf: The PF pointer to search in
+ * @on: bool value for whether timestamps are enabled or disabled
+ */
+static void ice_set_tx_tstamp(struct ice_pf *pf, bool on)
+{
+ struct ice_vsi *vsi;
+ u32 val;
+ u16 i;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi)
+ return;
+
+ /* Set the timestamp enable flag for all the Tx rings */
+ ice_for_each_txq(vsi, i) {
+ if (!vsi->tx_rings[i])
+ continue;
+ vsi->tx_rings[i]->ptp_tx = on;
+ }
+
+ /* Configure the Tx timestamp interrupt */
+ val = rd32(&pf->hw, PFINT_OICR_ENA);
+ if (on)
+ val |= PFINT_OICR_TSYN_TX_M;
+ else
+ val &= ~PFINT_OICR_TSYN_TX_M;
+ wr32(&pf->hw, PFINT_OICR_ENA, val);
+
+ pf->ptp.tstamp_config.tx_type = on ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+}
+
+/**
+ * ice_set_rx_tstamp - Enable or disable Rx timestamping
+ * @pf: The PF pointer to search in
+ * @on: bool value for whether timestamps are enabled or disabled
+ */
+static void ice_set_rx_tstamp(struct ice_pf *pf, bool on)
+{
+ struct ice_vsi *vsi;
+ u16 i;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi)
+ return;
+
+ /* Set the timestamp flag for all the Rx rings */
+ ice_for_each_rxq(vsi, i) {
+ if (!vsi->rx_rings[i])
+ continue;
+ vsi->rx_rings[i]->ptp_rx = on;
+ }
+
+ pf->ptp.tstamp_config.rx_filter = on ? HWTSTAMP_FILTER_ALL :
+ HWTSTAMP_FILTER_NONE;
+}
+
+/**
+ * ice_ptp_cfg_timestamp - Configure timestamp for init/deinit
+ * @pf: Board private structure
+ * @ena: bool value to enable or disable time stamp
+ *
+ * This function will configure timestamping during PTP initialization
+ * and deinitialization
+ */
+void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena)
+{
+ ice_set_tx_tstamp(pf, ena);
+ ice_set_rx_tstamp(pf, ena);
+}
+
+/**
+ * ice_get_ptp_clock_index - Get the PTP clock index
+ * @pf: the PF pointer
+ *
+ * Determine the clock index of the PTP clock associated with this device. If
+ * this is the PF controlling the clock, just use the local access to the
+ * clock device pointer.
+ *
+ * Otherwise, read from the driver shared parameters to determine the clock
+ * index value.
+ *
+ * Returns: the index of the PTP clock associated with this device, or -1 if
+ * there is no associated clock.
+ */
+int ice_get_ptp_clock_index(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ enum ice_aqc_driver_params param_idx;
+ struct ice_hw *hw = &pf->hw;
+ u8 tmr_idx;
+ u32 value;
+ int err;
+
+ /* Use the ptp_clock structure if we're the main PF */
+ if (pf->ptp.clock)
+ return ptp_clock_index(pf->ptp.clock);
+
+ tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc;
+ if (!tmr_idx)
+ param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR0;
+ else
+ param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR1;
+
+ err = ice_aq_get_driver_param(hw, param_idx, &value, NULL);
+ if (err) {
+ dev_err(dev, "Failed to read PTP clock index parameter, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ return -1;
+ }
+
+ /* The PTP clock index is an integer, and will be between 0 and
+ * INT_MAX. The highest bit of the driver shared parameter is used to
+ * indicate whether or not the currently stored clock index is valid.
+ */
+ if (!(value & PTP_SHARED_CLK_IDX_VALID))
+ return -1;
+
+ return value & ~PTP_SHARED_CLK_IDX_VALID;
+}
+
+/**
+ * ice_set_ptp_clock_index - Set the PTP clock index
+ * @pf: the PF pointer
+ *
+ * Set the PTP clock index for this device into the shared driver parameters,
+ * so that other PFs associated with this device can read it.
+ *
+ * If the PF is unable to store the clock index, it will log an error, but
+ * will continue operating PTP.
+ */
+static void ice_set_ptp_clock_index(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ enum ice_aqc_driver_params param_idx;
+ struct ice_hw *hw = &pf->hw;
+ u8 tmr_idx;
+ u32 value;
+ int err;
+
+ if (!pf->ptp.clock)
+ return;
+
+ tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc;
+ if (!tmr_idx)
+ param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR0;
+ else
+ param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR1;
+
+ value = (u32)ptp_clock_index(pf->ptp.clock);
+ if (value > INT_MAX) {
+ dev_err(dev, "PTP Clock index is too large to store\n");
+ return;
+ }
+ value |= PTP_SHARED_CLK_IDX_VALID;
+
+ err = ice_aq_set_driver_param(hw, param_idx, value, NULL);
+ if (err) {
+ dev_err(dev, "Failed to set PTP clock index parameter, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ }
+}
+
+/**
+ * ice_clear_ptp_clock_index - Clear the PTP clock index
+ * @pf: the PF pointer
+ *
+ * Clear the PTP clock index for this device. Must be called when
+ * unregistering the PTP clock, in order to ensure other PFs stop reporting
+ * a clock object that no longer exists.
+ */
+static void ice_clear_ptp_clock_index(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ enum ice_aqc_driver_params param_idx;
+ struct ice_hw *hw = &pf->hw;
+ u8 tmr_idx;
+ int err;
+
+ /* Do not clear the index if we don't own the timer */
+ if (!hw->func_caps.ts_func_info.src_tmr_owned)
+ return;
+
+ tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc;
+ if (!tmr_idx)
+ param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR0;
+ else
+ param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR1;
+
+ err = ice_aq_set_driver_param(hw, param_idx, 0, NULL);
+ if (err) {
+ dev_dbg(dev, "Failed to clear PTP clock index parameter, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ }
+}
+
+/**
+ * ice_ptp_read_src_clk_reg - Read the source clock register
+ * @pf: Board private structure
+ * @sts: Optional parameter for holding a pair of system timestamps from
+ * the system clock. Will be ignored if NULL is given.
+ */
+static u64
+ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts)
+{
+ struct ice_hw *hw = &pf->hw;
+ u32 hi, lo, lo2;
+ u8 tmr_idx;
+
+ tmr_idx = ice_get_ptp_src_clock_index(hw);
+ /* Read the system timestamp pre PHC read */
+ ptp_read_system_prets(sts);
+
+ lo = rd32(hw, GLTSYN_TIME_L(tmr_idx));
+
+ /* Read the system timestamp post PHC read */
+ ptp_read_system_postts(sts);
+
+ hi = rd32(hw, GLTSYN_TIME_H(tmr_idx));
+ lo2 = rd32(hw, GLTSYN_TIME_L(tmr_idx));
+
+ if (lo2 < lo) {
+ /* if TIME_L rolled over read TIME_L again and update
+ * system timestamps
+ */
+ ptp_read_system_prets(sts);
+ lo = rd32(hw, GLTSYN_TIME_L(tmr_idx));
+ ptp_read_system_postts(sts);
+ hi = rd32(hw, GLTSYN_TIME_H(tmr_idx));
+ }
+
+ return ((u64)hi << 32) | lo;
+}
+
+/**
+ * ice_ptp_extend_32b_ts - Convert a 32b nanoseconds timestamp to 64b
+ * @cached_phc_time: recently cached copy of PHC time
+ * @in_tstamp: Ingress/egress 32b nanoseconds timestamp value
+ *
+ * Hardware captures timestamps which contain only 32 bits of nominal
+ * nanoseconds, as opposed to the 64bit timestamps that the stack expects.
+ * Note that the captured timestamp values may be 40 bits, but the lower
+ * 8 bits are sub-nanoseconds and generally discarded.
+ *
+ * Extend the 32bit nanosecond timestamp using the following algorithm and
+ * assumptions:
+ *
+ * 1) have a recently cached copy of the PHC time
+ * 2) assume that the in_tstamp was captured 2^31 nanoseconds (~2.1
+ * seconds) before or after the PHC time was captured.
+ * 3) calculate the delta between the cached time and the timestamp
+ * 4) if the delta is smaller than 2^31 nanoseconds, then the timestamp was
+ * captured after the PHC time. In this case, the full timestamp is just
+ * the cached PHC time plus the delta.
+ * 5) otherwise, if the delta is larger than 2^31 nanoseconds, then the
+ * timestamp was captured *before* the PHC time, i.e. because the PHC
+ * cache was updated after the timestamp was captured by hardware. In this
+ * case, the full timestamp is the cached time minus the inverse delta.
+ *
+ * This algorithm works even if the PHC time was updated after a Tx timestamp
+ * was requested, but before the Tx timestamp event was reported from
+ * hardware.
+ *
+ * This calculation primarily relies on keeping the cached PHC time up to
+ * date. If the timestamp was captured more than 2^31 nanoseconds after the
+ * PHC time, it is possible that the lower 32bits of PHC time have
+ * overflowed more than once, and we might generate an incorrect timestamp.
+ *
+ * This is prevented by (a) periodically updating the cached PHC time once
+ * a second, and (b) discarding any Tx timestamp packet if it has waited for
+ * a timestamp for more than one second.
+ */
+static u64 ice_ptp_extend_32b_ts(u64 cached_phc_time, u32 in_tstamp)
+{
+ u32 delta, phc_time_lo;
+ u64 ns;
+
+ /* Extract the lower 32 bits of the PHC time */
+ phc_time_lo = (u32)cached_phc_time;
+
+ /* Calculate the delta between the lower 32bits of the cached PHC
+ * time and the in_tstamp value
+ */
+ delta = (in_tstamp - phc_time_lo);
+
+ /* Do not assume that the in_tstamp is always more recent than the
+ * cached PHC time. If the delta is large, it indicates that the
+ * in_tstamp was taken in the past, and should be converted
+ * forward.
+ */
+ if (delta > (U32_MAX / 2)) {
+ /* reverse the delta calculation here */
+ delta = (phc_time_lo - in_tstamp);
+ ns = cached_phc_time - delta;
+ } else {
+ ns = cached_phc_time + delta;
+ }
+
+ return ns;
+}
+
+/**
+ * ice_ptp_extend_40b_ts - Convert a 40b timestamp to 64b nanoseconds
+ * @pf: Board private structure
+ * @in_tstamp: Ingress/egress 40b timestamp value
+ *
+ * The Tx and Rx timestamps are 40 bits wide, including 32 bits of nominal
+ * nanoseconds, 7 bits of sub-nanoseconds, and a valid bit.
+ *
+ * *--------------------------------------------------------------*
+ * | 32 bits of nanoseconds | 7 high bits of sub ns underflow | v |
+ * *--------------------------------------------------------------*
+ *
+ * The low bit is an indicator of whether the timestamp is valid. The next
+ * 7 bits are a capture of the upper 7 bits of the sub-nanosecond underflow,
+ * and the remaining 32 bits are the lower 32 bits of the PHC timer.
+ *
+ * It is assumed that the caller verifies the timestamp is valid prior to
+ * calling this function.
+ *
+ * Extract the 32bit nominal nanoseconds and extend them. Use the cached PHC
+ * time stored in the device private PTP structure as the basis for timestamp
+ * extension.
+ *
+ * See ice_ptp_extend_32b_ts for a detailed explanation of the extension
+ * algorithm.
+ */
+static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp)
+{
+ const u64 mask = GENMASK_ULL(31, 0);
+ unsigned long discard_time;
+
+ /* Discard the hardware timestamp if the cached PHC time is too old */
+ discard_time = pf->ptp.cached_phc_jiffies + msecs_to_jiffies(2000);
+ if (time_is_before_jiffies(discard_time)) {
+ pf->ptp.tx_hwtstamp_discarded++;
+ return 0;
+ }
+
+ return ice_ptp_extend_32b_ts(pf->ptp.cached_phc_time,
+ (in_tstamp >> 8) & mask);
+}
+
+/**
+ * ice_ptp_tx_tstamp - Process Tx timestamps for a port
+ * @tx: the PTP Tx timestamp tracker
+ *
+ * Process timestamps captured by the PHY associated with this port. To do
+ * this, loop over each index with a waiting skb.
+ *
+ * If a given index has a valid timestamp, perform the following steps:
+ *
+ * 1) copy the timestamp out of the PHY register
+ * 4) clear the timestamp valid bit in the PHY register
+ * 5) unlock the index by clearing the associated in_use bit.
+ * 2) extend the 40b timestamp value to get a 64bit timestamp
+ * 3) send that timestamp to the stack
+ *
+ * Returns true if all timestamps were handled, and false if any slots remain
+ * without a timestamp.
+ *
+ * After looping, if we still have waiting SKBs, return false. This may cause
+ * us effectively poll even when not strictly necessary. We do this because
+ * it's possible a new timestamp was requested around the same time as the
+ * interrupt. In some cases hardware might not interrupt us again when the
+ * timestamp is captured.
+ *
+ * Note that we only take the tracking lock when clearing the bit and when
+ * checking if we need to re-queue this task. The only place where bits can be
+ * set is the hard xmit routine where an SKB has a request flag set. The only
+ * places where we clear bits are this work function, or the periodic cleanup
+ * thread. If the cleanup thread clears a bit we're processing we catch it
+ * when we lock to clear the bit and then grab the SKB pointer. If a Tx thread
+ * starts a new timestamp, we might not begin processing it right away but we
+ * will notice it at the end when we re-queue the task. If a Tx thread starts
+ * a new timestamp just after this function exits without re-queuing,
+ * the interrupt when the timestamp finishes should trigger. Avoiding holding
+ * the lock for the entire function is important in order to ensure that Tx
+ * threads do not get blocked while waiting for the lock.
+ */
+static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx)
+{
+ struct ice_ptp_port *ptp_port;
+ bool more_timestamps;
+ struct ice_pf *pf;
+ u8 idx;
+
+ if (!tx->init)
+ return true;
+
+ ptp_port = container_of(tx, struct ice_ptp_port, tx);
+ pf = ptp_port_to_pf(ptp_port);
+
+ for_each_set_bit(idx, tx->in_use, tx->len) {
+ struct skb_shared_hwtstamps shhwtstamps = {};
+ u8 phy_idx = idx + tx->quad_offset;
+ u64 raw_tstamp, tstamp;
+ struct sk_buff *skb;
+ int err;
+
+ ice_trace(tx_tstamp_fw_req, tx->tstamps[idx].skb, idx);
+
+ err = ice_read_phy_tstamp(&pf->hw, tx->quad, phy_idx,
+ &raw_tstamp);
+ if (err)
+ continue;
+
+ ice_trace(tx_tstamp_fw_done, tx->tstamps[idx].skb, idx);
+
+ /* Check if the timestamp is invalid or stale */
+ if (!(raw_tstamp & ICE_PTP_TS_VALID) ||
+ raw_tstamp == tx->tstamps[idx].cached_tstamp)
+ continue;
+
+ /* The timestamp is valid, so we'll go ahead and clear this
+ * index and then send the timestamp up to the stack.
+ */
+ spin_lock(&tx->lock);
+ tx->tstamps[idx].cached_tstamp = raw_tstamp;
+ clear_bit(idx, tx->in_use);
+ skb = tx->tstamps[idx].skb;
+ tx->tstamps[idx].skb = NULL;
+ spin_unlock(&tx->lock);
+
+ /* it's (unlikely but) possible we raced with the cleanup
+ * thread for discarding old timestamp requests.
+ */
+ if (!skb)
+ continue;
+
+ /* Extend the timestamp using cached PHC time */
+ tstamp = ice_ptp_extend_40b_ts(pf, raw_tstamp);
+ if (tstamp) {
+ shhwtstamps.hwtstamp = ns_to_ktime(tstamp);
+ ice_trace(tx_tstamp_complete, skb, idx);
+ }
+
+ skb_tstamp_tx(skb, &shhwtstamps);
+ dev_kfree_skb_any(skb);
+ }
+
+ /* Check if we still have work to do. If so, re-queue this task to
+ * poll for remaining timestamps.
+ */
+ spin_lock(&tx->lock);
+ more_timestamps = tx->init && !bitmap_empty(tx->in_use, tx->len);
+ spin_unlock(&tx->lock);
+
+ return !more_timestamps;
+}
+
+/**
+ * ice_ptp_alloc_tx_tracker - Initialize tracking for Tx timestamps
+ * @tx: Tx tracking structure to initialize
+ *
+ * Assumes that the length has already been initialized. Do not call directly,
+ * use the ice_ptp_init_tx_e822 or ice_ptp_init_tx_e810 instead.
+ */
+static int
+ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx)
+{
+ tx->tstamps = kcalloc(tx->len, sizeof(*tx->tstamps), GFP_KERNEL);
+ if (!tx->tstamps)
+ return -ENOMEM;
+
+ tx->in_use = bitmap_zalloc(tx->len, GFP_KERNEL);
+ if (!tx->in_use) {
+ kfree(tx->tstamps);
+ tx->tstamps = NULL;
+ return -ENOMEM;
+ }
+
+ spin_lock_init(&tx->lock);
+
+ tx->init = 1;
+
+ return 0;
+}
+
+/**
+ * ice_ptp_flush_tx_tracker - Flush any remaining timestamps from the tracker
+ * @pf: Board private structure
+ * @tx: the tracker to flush
+ */
+static void
+ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
+{
+ u8 idx;
+
+ for (idx = 0; idx < tx->len; idx++) {
+ u8 phy_idx = idx + tx->quad_offset;
+
+ spin_lock(&tx->lock);
+ if (tx->tstamps[idx].skb) {
+ dev_kfree_skb_any(tx->tstamps[idx].skb);
+ tx->tstamps[idx].skb = NULL;
+ pf->ptp.tx_hwtstamp_flushed++;
+ }
+ clear_bit(idx, tx->in_use);
+ spin_unlock(&tx->lock);
+
+ /* Clear any potential residual timestamp in the PHY block */
+ if (!pf->hw.reset_ongoing)
+ ice_clear_phy_tstamp(&pf->hw, tx->quad, phy_idx);
+ }
+}
+
+/**
+ * ice_ptp_release_tx_tracker - Release allocated memory for Tx tracker
+ * @pf: Board private structure
+ * @tx: Tx tracking structure to release
+ *
+ * Free memory associated with the Tx timestamp tracker.
+ */
+static void
+ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
+{
+ tx->init = 0;
+
+ /* wait for potentially outstanding interrupt to complete */
+ synchronize_irq(pf->msix_entries[pf->oicr_idx].vector);
+
+ ice_ptp_flush_tx_tracker(pf, tx);
+
+ kfree(tx->tstamps);
+ tx->tstamps = NULL;
+
+ bitmap_free(tx->in_use);
+ tx->in_use = NULL;
+
+ tx->len = 0;
+}
+
+/**
+ * ice_ptp_init_tx_e822 - Initialize tracking for Tx timestamps
+ * @pf: Board private structure
+ * @tx: the Tx tracking structure to initialize
+ * @port: the port this structure tracks
+ *
+ * Initialize the Tx timestamp tracker for this port. For generic MAC devices,
+ * the timestamp block is shared for all ports in the same quad. To avoid
+ * ports using the same timestamp index, logically break the block of
+ * registers into chunks based on the port number.
+ */
+static int
+ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port)
+{
+ tx->quad = port / ICE_PORTS_PER_QUAD;
+ tx->quad_offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT;
+ tx->len = INDEX_PER_PORT;
+
+ return ice_ptp_alloc_tx_tracker(tx);
+}
+
+/**
+ * ice_ptp_init_tx_e810 - Initialize tracking for Tx timestamps
+ * @pf: Board private structure
+ * @tx: the Tx tracking structure to initialize
+ *
+ * Initialize the Tx timestamp tracker for this PF. For E810 devices, each
+ * port has its own block of timestamps, independent of the other ports.
+ */
+static int
+ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx)
+{
+ tx->quad = pf->hw.port_info->lport;
+ tx->quad_offset = 0;
+ tx->len = INDEX_PER_QUAD;
+
+ return ice_ptp_alloc_tx_tracker(tx);
+}
+
+/**
+ * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped
+ * @pf: pointer to the PF struct
+ * @tx: PTP Tx tracker to clean up
+ *
+ * Loop through the Tx timestamp requests and see if any of them have been
+ * waiting for a long time. Discard any SKBs that have been waiting for more
+ * than 2 seconds. This is long enough to be reasonably sure that the
+ * timestamp will never be captured. This might happen if the packet gets
+ * discarded before it reaches the PHY timestamping block.
+ */
+static void ice_ptp_tx_tstamp_cleanup(struct ice_pf *pf, struct ice_ptp_tx *tx)
+{
+ struct ice_hw *hw = &pf->hw;
+ u8 idx;
+
+ if (!tx->init)
+ return;
+
+ for_each_set_bit(idx, tx->in_use, tx->len) {
+ struct sk_buff *skb;
+ u64 raw_tstamp;
+
+ /* Check if this SKB has been waiting for too long */
+ if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ))
+ continue;
+
+ /* Read tstamp to be able to use this register again */
+ ice_read_phy_tstamp(hw, tx->quad, idx + tx->quad_offset,
+ &raw_tstamp);
+
+ spin_lock(&tx->lock);
+ skb = tx->tstamps[idx].skb;
+ tx->tstamps[idx].skb = NULL;
+ clear_bit(idx, tx->in_use);
+ spin_unlock(&tx->lock);
+
+ /* Count the number of Tx timestamps which have timed out */
+ pf->ptp.tx_hwtstamp_timeouts++;
+
+ /* Free the SKB after we've cleared the bit */
+ dev_kfree_skb_any(skb);
+ }
+}
+
+/**
+ * ice_ptp_update_cached_phctime - Update the cached PHC time values
+ * @pf: Board specific private structure
+ *
+ * This function updates the system time values which are cached in the PF
+ * structure and the Rx rings.
+ *
+ * This function must be called periodically to ensure that the cached value
+ * is never more than 2 seconds old.
+ *
+ * Note that the cached copy in the PF PTP structure is always updated, even
+ * if we can't update the copy in the Rx rings.
+ *
+ * Return:
+ * * 0 - OK, successfully updated
+ * * -EAGAIN - PF was busy, need to reschedule the update
+ */
+static int ice_ptp_update_cached_phctime(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ unsigned long update_before;
+ u64 systime;
+ int i;
+
+ update_before = pf->ptp.cached_phc_jiffies + msecs_to_jiffies(2000);
+ if (pf->ptp.cached_phc_time &&
+ time_is_before_jiffies(update_before)) {
+ unsigned long time_taken = jiffies - pf->ptp.cached_phc_jiffies;
+
+ dev_warn(dev, "%u msecs passed between update to cached PHC time\n",
+ jiffies_to_msecs(time_taken));
+ pf->ptp.late_cached_phc_updates++;
+ }
+
+ /* Read the current PHC time */
+ systime = ice_ptp_read_src_clk_reg(pf, NULL);
+
+ /* Update the cached PHC time stored in the PF structure */
+ WRITE_ONCE(pf->ptp.cached_phc_time, systime);
+ WRITE_ONCE(pf->ptp.cached_phc_jiffies, jiffies);
+
+ if (test_and_set_bit(ICE_CFG_BUSY, pf->state))
+ return -EAGAIN;
+
+ ice_for_each_vsi(pf, i) {
+ struct ice_vsi *vsi = pf->vsi[i];
+ int j;
+
+ if (!vsi)
+ continue;
+
+ if (vsi->type != ICE_VSI_PF)
+ continue;
+
+ ice_for_each_rxq(vsi, j) {
+ if (!vsi->rx_rings[j])
+ continue;
+ WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime);
+ }
+ }
+ clear_bit(ICE_CFG_BUSY, pf->state);
+
+ return 0;
+}
+
+/**
+ * ice_ptp_reset_cached_phctime - Reset cached PHC time after an update
+ * @pf: Board specific private structure
+ *
+ * This function must be called when the cached PHC time is no longer valid,
+ * such as after a time adjustment. It discards any outstanding Tx timestamps,
+ * and updates the cached PHC time for both the PF and Rx rings. If updating
+ * the PHC time cannot be done immediately, a warning message is logged and
+ * the work item is scheduled.
+ *
+ * These steps are required in order to ensure that we do not accidentally
+ * report a timestamp extended by the wrong PHC cached copy. Note that we
+ * do not directly update the cached timestamp here because it is possible
+ * this might produce an error when ICE_CFG_BUSY is set. If this occurred, we
+ * would have to try again. During that time window, timestamps might be
+ * requested and returned with an invalid extension. Thus, on failure to
+ * immediately update the cached PHC time we would need to zero the value
+ * anyways. For this reason, we just zero the value immediately and queue the
+ * update work item.
+ */
+static void ice_ptp_reset_cached_phctime(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ int err;
+
+ /* Update the cached PHC time immediately if possible, otherwise
+ * schedule the work item to execute soon.
+ */
+ err = ice_ptp_update_cached_phctime(pf);
+ if (err) {
+ /* If another thread is updating the Rx rings, we won't
+ * properly reset them here. This could lead to reporting of
+ * invalid timestamps, but there isn't much we can do.
+ */
+ dev_warn(dev, "%s: ICE_CFG_BUSY, unable to immediately update cached PHC time\n",
+ __func__);
+
+ /* Queue the work item to update the Rx rings when possible */
+ kthread_queue_delayed_work(pf->ptp.kworker, &pf->ptp.work,
+ msecs_to_jiffies(10));
+ }
+
+ /* Flush any outstanding Tx timestamps */
+ ice_ptp_flush_tx_tracker(pf, &pf->ptp.port.tx);
+}
+
+/**
+ * ice_ptp_read_time - Read the time from the device
+ * @pf: Board private structure
+ * @ts: timespec structure to hold the current time value
+ * @sts: Optional parameter for holding a pair of system timestamps from
+ * the system clock. Will be ignored if NULL is given.
+ *
+ * This function reads the source clock registers and stores them in a timespec.
+ * However, since the registers are 64 bits of nanoseconds, we must convert the
+ * result to a timespec before we can return.
+ */
+static void
+ice_ptp_read_time(struct ice_pf *pf, struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
+{
+ u64 time_ns = ice_ptp_read_src_clk_reg(pf, sts);
+
+ *ts = ns_to_timespec64(time_ns);
+}
+
+/**
+ * ice_ptp_write_init - Set PHC time to provided value
+ * @pf: Board private structure
+ * @ts: timespec structure that holds the new time value
+ *
+ * Set the PHC time to the specified time provided in the timespec.
+ */
+static int ice_ptp_write_init(struct ice_pf *pf, struct timespec64 *ts)
+{
+ u64 ns = timespec64_to_ns(ts);
+ struct ice_hw *hw = &pf->hw;
+
+ return ice_ptp_init_time(hw, ns);
+}
+
+/**
+ * ice_ptp_write_adj - Adjust PHC clock time atomically
+ * @pf: Board private structure
+ * @adj: Adjustment in nanoseconds
+ *
+ * Perform an atomic adjustment of the PHC time by the specified number of
+ * nanoseconds.
+ */
+static int ice_ptp_write_adj(struct ice_pf *pf, s32 adj)
+{
+ struct ice_hw *hw = &pf->hw;
+
+ return ice_ptp_adj_clock(hw, adj);
+}
+
+/**
+ * ice_base_incval - Get base timer increment value
+ * @pf: Board private structure
+ *
+ * Look up the base timer increment value for this device. The base increment
+ * value is used to define the nominal clock tick rate. This increment value
+ * is programmed during device initialization. It is also used as the basis
+ * for calculating adjustments using scaled_ppm.
+ */
+static u64 ice_base_incval(struct ice_pf *pf)
+{
+ struct ice_hw *hw = &pf->hw;
+ u64 incval;
+
+ if (ice_is_e810(hw))
+ incval = ICE_PTP_NOMINAL_INCVAL_E810;
+ else if (ice_e822_time_ref(hw) < NUM_ICE_TIME_REF_FREQ)
+ incval = ice_e822_nominal_incval(ice_e822_time_ref(hw));
+ else
+ incval = UNKNOWN_INCVAL_E822;
+
+ dev_dbg(ice_pf_to_dev(pf), "PTP: using base increment value of 0x%016llx\n",
+ incval);
+
+ return incval;
+}
+
+/**
+ * ice_ptp_reset_ts_memory_quad - Reset timestamp memory for one quad
+ * @pf: The PF private data structure
+ * @quad: The quad (0-4)
+ */
+static void ice_ptp_reset_ts_memory_quad(struct ice_pf *pf, int quad)
+{
+ struct ice_hw *hw = &pf->hw;
+
+ ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, Q_REG_TS_CTRL_M);
+ ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, ~(u32)Q_REG_TS_CTRL_M);
+}
+
+/**
+ * ice_ptp_check_tx_fifo - Check whether Tx FIFO is in an OK state
+ * @port: PTP port for which Tx FIFO is checked
+ */
+static int ice_ptp_check_tx_fifo(struct ice_ptp_port *port)
+{
+ int quad = port->port_num / ICE_PORTS_PER_QUAD;
+ int offs = port->port_num % ICE_PORTS_PER_QUAD;
+ struct ice_pf *pf;
+ struct ice_hw *hw;
+ u32 val, phy_sts;
+ int err;
+
+ pf = ptp_port_to_pf(port);
+ hw = &pf->hw;
+
+ if (port->tx_fifo_busy_cnt == FIFO_OK)
+ return 0;
+
+ /* need to read FIFO state */
+ if (offs == 0 || offs == 1)
+ err = ice_read_quad_reg_e822(hw, quad, Q_REG_FIFO01_STATUS,
+ &val);
+ else
+ err = ice_read_quad_reg_e822(hw, quad, Q_REG_FIFO23_STATUS,
+ &val);
+
+ if (err) {
+ dev_err(ice_pf_to_dev(pf), "PTP failed to check port %d Tx FIFO, err %d\n",
+ port->port_num, err);
+ return err;
+ }
+
+ if (offs & 0x1)
+ phy_sts = (val & Q_REG_FIFO13_M) >> Q_REG_FIFO13_S;
+ else
+ phy_sts = (val & Q_REG_FIFO02_M) >> Q_REG_FIFO02_S;
+
+ if (phy_sts & FIFO_EMPTY) {
+ port->tx_fifo_busy_cnt = FIFO_OK;
+ return 0;
+ }
+
+ port->tx_fifo_busy_cnt++;
+
+ dev_dbg(ice_pf_to_dev(pf), "Try %d, port %d FIFO not empty\n",
+ port->tx_fifo_busy_cnt, port->port_num);
+
+ if (port->tx_fifo_busy_cnt == ICE_PTP_FIFO_NUM_CHECKS) {
+ dev_dbg(ice_pf_to_dev(pf),
+ "Port %d Tx FIFO still not empty; resetting quad %d\n",
+ port->port_num, quad);
+ ice_ptp_reset_ts_memory_quad(pf, quad);
+ port->tx_fifo_busy_cnt = FIFO_OK;
+ return 0;
+ }
+
+ return -EAGAIN;
+}
+
+/**
+ * ice_ptp_check_tx_offset_valid - Check if the Tx PHY offset is valid
+ * @port: the PTP port to check
+ *
+ * Checks whether the Tx offset for the PHY associated with this port is
+ * valid. Returns 0 if the offset is valid, and a non-zero error code if it is
+ * not.
+ */
+static int ice_ptp_check_tx_offset_valid(struct ice_ptp_port *port)
+{
+ struct ice_pf *pf = ptp_port_to_pf(port);
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ u32 val;
+ int err;
+
+ err = ice_ptp_check_tx_fifo(port);
+ if (err)
+ return err;
+
+ err = ice_read_phy_reg_e822(hw, port->port_num, P_REG_TX_OV_STATUS,
+ &val);
+ if (err) {
+ dev_err(dev, "Failed to read TX_OV_STATUS for port %d, err %d\n",
+ port->port_num, err);
+ return -EAGAIN;
+ }
+
+ if (!(val & P_REG_TX_OV_STATUS_OV_M))
+ return -EAGAIN;
+
+ return 0;
+}
+
+/**
+ * ice_ptp_check_rx_offset_valid - Check if the Rx PHY offset is valid
+ * @port: the PTP port to check
+ *
+ * Checks whether the Rx offset for the PHY associated with this port is
+ * valid. Returns 0 if the offset is valid, and a non-zero error code if it is
+ * not.
+ */
+static int ice_ptp_check_rx_offset_valid(struct ice_ptp_port *port)
+{
+ struct ice_pf *pf = ptp_port_to_pf(port);
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ int err;
+ u32 val;
+
+ err = ice_read_phy_reg_e822(hw, port->port_num, P_REG_RX_OV_STATUS,
+ &val);
+ if (err) {
+ dev_err(dev, "Failed to read RX_OV_STATUS for port %d, err %d\n",
+ port->port_num, err);
+ return err;
+ }
+
+ if (!(val & P_REG_RX_OV_STATUS_OV_M))
+ return -EAGAIN;
+
+ return 0;
+}
+
+/**
+ * ice_ptp_check_offset_valid - Check port offset valid bit
+ * @port: Port for which offset valid bit is checked
+ *
+ * Returns 0 if both Tx and Rx offset are valid, and -EAGAIN if one of the
+ * offset is not ready.
+ */
+static int ice_ptp_check_offset_valid(struct ice_ptp_port *port)
+{
+ int tx_err, rx_err;
+
+ /* always check both Tx and Rx offset validity */
+ tx_err = ice_ptp_check_tx_offset_valid(port);
+ rx_err = ice_ptp_check_rx_offset_valid(port);
+
+ if (tx_err || rx_err)
+ return -EAGAIN;
+
+ return 0;
+}
+
+/**
+ * ice_ptp_wait_for_offset_valid - Check for valid Tx and Rx offsets
+ * @work: Pointer to the kthread_work structure for this task
+ *
+ * Check whether both the Tx and Rx offsets are valid for enabling the vernier
+ * calibration.
+ *
+ * Once we have valid offsets from hardware, update the total Tx and Rx
+ * offsets, and exit bypass mode. This enables more precise timestamps using
+ * the extra data measured during the vernier calibration process.
+ */
+static void ice_ptp_wait_for_offset_valid(struct kthread_work *work)
+{
+ struct ice_ptp_port *port;
+ int err;
+ struct device *dev;
+ struct ice_pf *pf;
+ struct ice_hw *hw;
+
+ port = container_of(work, struct ice_ptp_port, ov_work.work);
+ pf = ptp_port_to_pf(port);
+ hw = &pf->hw;
+ dev = ice_pf_to_dev(pf);
+
+ if (ice_is_reset_in_progress(pf->state))
+ return;
+
+ if (ice_ptp_check_offset_valid(port)) {
+ /* Offsets not ready yet, try again later */
+ kthread_queue_delayed_work(pf->ptp.kworker,
+ &port->ov_work,
+ msecs_to_jiffies(100));
+ return;
+ }
+
+ /* Offsets are valid, so it is safe to exit bypass mode */
+ err = ice_phy_exit_bypass_e822(hw, port->port_num);
+ if (err) {
+ dev_warn(dev, "Failed to exit bypass mode for PHY port %u, err %d\n",
+ port->port_num, err);
+ return;
+ }
+}
+
+/**
+ * ice_ptp_port_phy_stop - Stop timestamping for a PHY port
+ * @ptp_port: PTP port to stop
+ */
+static int
+ice_ptp_port_phy_stop(struct ice_ptp_port *ptp_port)
+{
+ struct ice_pf *pf = ptp_port_to_pf(ptp_port);
+ u8 port = ptp_port->port_num;
+ struct ice_hw *hw = &pf->hw;
+ int err;
+
+ if (ice_is_e810(hw))
+ return 0;
+
+ mutex_lock(&ptp_port->ps_lock);
+
+ kthread_cancel_delayed_work_sync(&ptp_port->ov_work);
+
+ err = ice_stop_phy_timer_e822(hw, port, true);
+ if (err)
+ dev_err(ice_pf_to_dev(pf), "PTP failed to set PHY port %d down, err %d\n",
+ port, err);
+
+ mutex_unlock(&ptp_port->ps_lock);
+
+ return err;
+}
+
+/**
+ * ice_ptp_port_phy_restart - (Re)start and calibrate PHY timestamping
+ * @ptp_port: PTP port for which the PHY start is set
+ *
+ * Start the PHY timestamping block, and initiate Vernier timestamping
+ * calibration. If timestamping cannot be calibrated (such as if link is down)
+ * then disable the timestamping block instead.
+ */
+static int
+ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port)
+{
+ struct ice_pf *pf = ptp_port_to_pf(ptp_port);
+ u8 port = ptp_port->port_num;
+ struct ice_hw *hw = &pf->hw;
+ int err;
+
+ if (ice_is_e810(hw))
+ return 0;
+
+ if (!ptp_port->link_up)
+ return ice_ptp_port_phy_stop(ptp_port);
+
+ mutex_lock(&ptp_port->ps_lock);
+
+ kthread_cancel_delayed_work_sync(&ptp_port->ov_work);
+
+ /* temporarily disable Tx timestamps while calibrating PHY offset */
+ ptp_port->tx.calibrating = true;
+ ptp_port->tx_fifo_busy_cnt = 0;
+
+ /* Start the PHY timer in bypass mode */
+ err = ice_start_phy_timer_e822(hw, port, true);
+ if (err)
+ goto out_unlock;
+
+ /* Enable Tx timestamps right away */
+ ptp_port->tx.calibrating = false;
+
+ kthread_queue_delayed_work(pf->ptp.kworker, &ptp_port->ov_work, 0);
+
+out_unlock:
+ if (err)
+ dev_err(ice_pf_to_dev(pf), "PTP failed to set PHY port %d up, err %d\n",
+ port, err);
+
+ mutex_unlock(&ptp_port->ps_lock);
+
+ return err;
+}
+
+/**
+ * ice_ptp_link_change - Set or clear port registers for timestamping
+ * @pf: Board private structure
+ * @port: Port for which the PHY start is set
+ * @linkup: Link is up or down
+ */
+int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
+{
+ struct ice_ptp_port *ptp_port;
+
+ if (!test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
+ return 0;
+
+ if (port >= ICE_NUM_EXTERNAL_PORTS)
+ return -EINVAL;
+
+ ptp_port = &pf->ptp.port;
+ if (ptp_port->port_num != port)
+ return -EINVAL;
+
+ /* Update cached link err for this port immediately */
+ ptp_port->link_up = linkup;
+
+ if (!test_bit(ICE_FLAG_PTP, pf->flags))
+ /* PTP is not setup */
+ return -EAGAIN;
+
+ return ice_ptp_port_phy_restart(ptp_port);
+}
+
+/**
+ * ice_ptp_reset_ts_memory - Reset timestamp memory for all quads
+ * @pf: The PF private data structure
+ */
+static void ice_ptp_reset_ts_memory(struct ice_pf *pf)
+{
+ int quad;
+
+ quad = pf->hw.port_info->lport / ICE_PORTS_PER_QUAD;
+ ice_ptp_reset_ts_memory_quad(pf, quad);
+}
+
+/**
+ * ice_ptp_tx_ena_intr - Enable or disable the Tx timestamp interrupt
+ * @pf: PF private structure
+ * @ena: bool value to enable or disable interrupt
+ * @threshold: Minimum number of packets at which intr is triggered
+ *
+ * Utility function to enable or disable Tx timestamp interrupt and threshold
+ */
+static int ice_ptp_tx_ena_intr(struct ice_pf *pf, bool ena, u32 threshold)
+{
+ struct ice_hw *hw = &pf->hw;
+ int err = 0;
+ int quad;
+ u32 val;
+
+ ice_ptp_reset_ts_memory(pf);
+
+ for (quad = 0; quad < ICE_MAX_QUAD; quad++) {
+ err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG,
+ &val);
+ if (err)
+ break;
+
+ if (ena) {
+ val |= Q_REG_TX_MEM_GBL_CFG_INTR_ENA_M;
+ val &= ~Q_REG_TX_MEM_GBL_CFG_INTR_THR_M;
+ val |= ((threshold << Q_REG_TX_MEM_GBL_CFG_INTR_THR_S) &
+ Q_REG_TX_MEM_GBL_CFG_INTR_THR_M);
+ } else {
+ val &= ~Q_REG_TX_MEM_GBL_CFG_INTR_ENA_M;
+ }
+
+ err = ice_write_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG,
+ val);
+ if (err)
+ break;
+ }
+
+ if (err)
+ dev_err(ice_pf_to_dev(pf), "PTP failed in intr ena, err %d\n",
+ err);
+ return err;
+}
+
+/**
+ * ice_ptp_reset_phy_timestamping - Reset PHY timestamping block
+ * @pf: Board private structure
+ */
+static void ice_ptp_reset_phy_timestamping(struct ice_pf *pf)
+{
+ ice_ptp_port_phy_restart(&pf->ptp.port);
+}
+
+/**
+ * ice_ptp_adjfine - Adjust clock increment rate
+ * @info: the driver's PTP info structure
+ * @scaled_ppm: Parts per million with 16-bit fractional field
+ *
+ * Adjust the frequency of the clock by the indicated scaled ppm from the
+ * base frequency.
+ */
+static int ice_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm)
+{
+ struct ice_pf *pf = ptp_info_to_pf(info);
+ struct ice_hw *hw = &pf->hw;
+ u64 incval, diff;
+ int neg_adj = 0;
+ int err;
+
+ incval = ice_base_incval(pf);
+
+ if (scaled_ppm < 0) {
+ neg_adj = 1;
+ scaled_ppm = -scaled_ppm;
+ }
+
+ diff = mul_u64_u64_div_u64(incval, (u64)scaled_ppm,
+ 1000000ULL << 16);
+ if (neg_adj)
+ incval -= diff;
+ else
+ incval += diff;
+
+ err = ice_ptp_write_incval_locked(hw, incval);
+ if (err) {
+ dev_err(ice_pf_to_dev(pf), "PTP failed to set incval, err %d\n",
+ err);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_ptp_extts_event - Process PTP external clock event
+ * @pf: Board private structure
+ */
+void ice_ptp_extts_event(struct ice_pf *pf)
+{
+ struct ptp_clock_event event;
+ struct ice_hw *hw = &pf->hw;
+ u8 chan, tmr_idx;
+ u32 hi, lo;
+
+ tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+ /* Event time is captured by one of the two matched registers
+ * GLTSYN_EVNT_L: 32 LSB of sampled time event
+ * GLTSYN_EVNT_H: 32 MSB of sampled time event
+ * Event is defined in GLTSYN_EVNT_0 register
+ */
+ for (chan = 0; chan < GLTSYN_EVNT_H_IDX_MAX; chan++) {
+ /* Check if channel is enabled */
+ if (pf->ptp.ext_ts_irq & (1 << chan)) {
+ lo = rd32(hw, GLTSYN_EVNT_L(chan, tmr_idx));
+ hi = rd32(hw, GLTSYN_EVNT_H(chan, tmr_idx));
+ event.timestamp = (((u64)hi) << 32) | lo;
+ event.type = PTP_CLOCK_EXTTS;
+ event.index = chan;
+
+ /* Fire event */
+ ptp_clock_event(pf->ptp.clock, &event);
+ pf->ptp.ext_ts_irq &= ~(1 << chan);
+ }
+ }
+}
+
+/**
+ * ice_ptp_cfg_extts - Configure EXTTS pin and channel
+ * @pf: Board private structure
+ * @ena: true to enable; false to disable
+ * @chan: GPIO channel (0-3)
+ * @gpio_pin: GPIO pin
+ * @extts_flags: request flags from the ptp_extts_request.flags
+ */
+static int
+ice_ptp_cfg_extts(struct ice_pf *pf, bool ena, unsigned int chan, u32 gpio_pin,
+ unsigned int extts_flags)
+{
+ u32 func, aux_reg, gpio_reg, irq_reg;
+ struct ice_hw *hw = &pf->hw;
+ u8 tmr_idx;
+
+ if (chan > (unsigned int)pf->ptp.info.n_ext_ts)
+ return -EINVAL;
+
+ tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+ irq_reg = rd32(hw, PFINT_OICR_ENA);
+
+ if (ena) {
+ /* Enable the interrupt */
+ irq_reg |= PFINT_OICR_TSYN_EVNT_M;
+ aux_reg = GLTSYN_AUX_IN_0_INT_ENA_M;
+
+#define GLTSYN_AUX_IN_0_EVNTLVL_RISING_EDGE BIT(0)
+#define GLTSYN_AUX_IN_0_EVNTLVL_FALLING_EDGE BIT(1)
+
+ /* set event level to requested edge */
+ if (extts_flags & PTP_FALLING_EDGE)
+ aux_reg |= GLTSYN_AUX_IN_0_EVNTLVL_FALLING_EDGE;
+ if (extts_flags & PTP_RISING_EDGE)
+ aux_reg |= GLTSYN_AUX_IN_0_EVNTLVL_RISING_EDGE;
+
+ /* Write GPIO CTL reg.
+ * 0x1 is input sampled by EVENT register(channel)
+ * + num_in_channels * tmr_idx
+ */
+ func = 1 + chan + (tmr_idx * 3);
+ gpio_reg = ((func << GLGEN_GPIO_CTL_PIN_FUNC_S) &
+ GLGEN_GPIO_CTL_PIN_FUNC_M);
+ pf->ptp.ext_ts_chan |= (1 << chan);
+ } else {
+ /* clear the values we set to reset defaults */
+ aux_reg = 0;
+ gpio_reg = 0;
+ pf->ptp.ext_ts_chan &= ~(1 << chan);
+ if (!pf->ptp.ext_ts_chan)
+ irq_reg &= ~PFINT_OICR_TSYN_EVNT_M;
+ }
+
+ wr32(hw, PFINT_OICR_ENA, irq_reg);
+ wr32(hw, GLTSYN_AUX_IN(chan, tmr_idx), aux_reg);
+ wr32(hw, GLGEN_GPIO_CTL(gpio_pin), gpio_reg);
+
+ return 0;
+}
+
+/**
+ * ice_ptp_cfg_clkout - Configure clock to generate periodic wave
+ * @pf: Board private structure
+ * @chan: GPIO channel (0-3)
+ * @config: desired periodic clk configuration. NULL will disable channel
+ * @store: If set to true the values will be stored
+ *
+ * Configure the internal clock generator modules to generate the clock wave of
+ * specified period.
+ */
+static int ice_ptp_cfg_clkout(struct ice_pf *pf, unsigned int chan,
+ struct ice_perout_channel *config, bool store)
+{
+ u64 current_time, period, start_time, phase;
+ struct ice_hw *hw = &pf->hw;
+ u32 func, val, gpio_pin;
+ u8 tmr_idx;
+
+ tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+ /* 0. Reset mode & out_en in AUX_OUT */
+ wr32(hw, GLTSYN_AUX_OUT(chan, tmr_idx), 0);
+
+ /* If we're disabling the output, clear out CLKO and TGT and keep
+ * output level low
+ */
+ if (!config || !config->ena) {
+ wr32(hw, GLTSYN_CLKO(chan, tmr_idx), 0);
+ wr32(hw, GLTSYN_TGT_L(chan, tmr_idx), 0);
+ wr32(hw, GLTSYN_TGT_H(chan, tmr_idx), 0);
+
+ val = GLGEN_GPIO_CTL_PIN_DIR_M;
+ gpio_pin = pf->ptp.perout_channels[chan].gpio_pin;
+ wr32(hw, GLGEN_GPIO_CTL(gpio_pin), val);
+
+ /* Store the value if requested */
+ if (store)
+ memset(&pf->ptp.perout_channels[chan], 0,
+ sizeof(struct ice_perout_channel));
+
+ return 0;
+ }
+ period = config->period;
+ start_time = config->start_time;
+ div64_u64_rem(start_time, period, &phase);
+ gpio_pin = config->gpio_pin;
+
+ /* 1. Write clkout with half of required period value */
+ if (period & 0x1) {
+ dev_err(ice_pf_to_dev(pf), "CLK Period must be an even value\n");
+ goto err;
+ }
+
+ period >>= 1;
+
+ /* For proper operation, the GLTSYN_CLKO must be larger than clock tick
+ */
+#define MIN_PULSE 3
+ if (period <= MIN_PULSE || period > U32_MAX) {
+ dev_err(ice_pf_to_dev(pf), "CLK Period must be > %d && < 2^33",
+ MIN_PULSE * 2);
+ goto err;
+ }
+
+ wr32(hw, GLTSYN_CLKO(chan, tmr_idx), lower_32_bits(period));
+
+ /* Allow time for programming before start_time is hit */
+ current_time = ice_ptp_read_src_clk_reg(pf, NULL);
+
+ /* if start time is in the past start the timer at the nearest second
+ * maintaining phase
+ */
+ if (start_time < current_time)
+ start_time = div64_u64(current_time + NSEC_PER_SEC - 1,
+ NSEC_PER_SEC) * NSEC_PER_SEC + phase;
+
+ if (ice_is_e810(hw))
+ start_time -= E810_OUT_PROP_DELAY_NS;
+ else
+ start_time -= ice_e822_pps_delay(ice_e822_time_ref(hw));
+
+ /* 2. Write TARGET time */
+ wr32(hw, GLTSYN_TGT_L(chan, tmr_idx), lower_32_bits(start_time));
+ wr32(hw, GLTSYN_TGT_H(chan, tmr_idx), upper_32_bits(start_time));
+
+ /* 3. Write AUX_OUT register */
+ val = GLTSYN_AUX_OUT_0_OUT_ENA_M | GLTSYN_AUX_OUT_0_OUTMOD_M;
+ wr32(hw, GLTSYN_AUX_OUT(chan, tmr_idx), val);
+
+ /* 4. write GPIO CTL reg */
+ func = 8 + chan + (tmr_idx * 4);
+ val = GLGEN_GPIO_CTL_PIN_DIR_M |
+ ((func << GLGEN_GPIO_CTL_PIN_FUNC_S) & GLGEN_GPIO_CTL_PIN_FUNC_M);
+ wr32(hw, GLGEN_GPIO_CTL(gpio_pin), val);
+
+ /* Store the value if requested */
+ if (store) {
+ memcpy(&pf->ptp.perout_channels[chan], config,
+ sizeof(struct ice_perout_channel));
+ pf->ptp.perout_channels[chan].start_time = phase;
+ }
+
+ return 0;
+err:
+ dev_err(ice_pf_to_dev(pf), "PTP failed to cfg per_clk\n");
+ return -EFAULT;
+}
+
+/**
+ * ice_ptp_disable_all_clkout - Disable all currently configured outputs
+ * @pf: pointer to the PF structure
+ *
+ * Disable all currently configured clock outputs. This is necessary before
+ * certain changes to the PTP hardware clock. Use ice_ptp_enable_all_clkout to
+ * re-enable the clocks again.
+ */
+static void ice_ptp_disable_all_clkout(struct ice_pf *pf)
+{
+ uint i;
+
+ for (i = 0; i < pf->ptp.info.n_per_out; i++)
+ if (pf->ptp.perout_channels[i].ena)
+ ice_ptp_cfg_clkout(pf, i, NULL, false);
+}
+
+/**
+ * ice_ptp_enable_all_clkout - Enable all configured periodic clock outputs
+ * @pf: pointer to the PF structure
+ *
+ * Enable all currently configured clock outputs. Use this after
+ * ice_ptp_disable_all_clkout to reconfigure the output signals according to
+ * their configuration.
+ */
+static void ice_ptp_enable_all_clkout(struct ice_pf *pf)
+{
+ uint i;
+
+ for (i = 0; i < pf->ptp.info.n_per_out; i++)
+ if (pf->ptp.perout_channels[i].ena)
+ ice_ptp_cfg_clkout(pf, i, &pf->ptp.perout_channels[i],
+ false);
+}
+
+/**
+ * ice_ptp_gpio_enable_e810 - Enable/disable ancillary features of PHC
+ * @info: the driver's PTP info structure
+ * @rq: The requested feature to change
+ * @on: Enable/disable flag
+ */
+static int
+ice_ptp_gpio_enable_e810(struct ptp_clock_info *info,
+ struct ptp_clock_request *rq, int on)
+{
+ struct ice_pf *pf = ptp_info_to_pf(info);
+ struct ice_perout_channel clk_cfg = {0};
+ bool sma_pres = false;
+ unsigned int chan;
+ u32 gpio_pin;
+ int err;
+
+ if (ice_is_feature_supported(pf, ICE_F_SMA_CTRL))
+ sma_pres = true;
+
+ switch (rq->type) {
+ case PTP_CLK_REQ_PEROUT:
+ chan = rq->perout.index;
+ if (sma_pres) {
+ if (chan == ice_pin_desc_e810t[SMA1].chan)
+ clk_cfg.gpio_pin = GPIO_20;
+ else if (chan == ice_pin_desc_e810t[SMA2].chan)
+ clk_cfg.gpio_pin = GPIO_22;
+ else
+ return -1;
+ } else if (ice_is_e810t(&pf->hw)) {
+ if (chan == 0)
+ clk_cfg.gpio_pin = GPIO_20;
+ else
+ clk_cfg.gpio_pin = GPIO_22;
+ } else if (chan == PPS_CLK_GEN_CHAN) {
+ clk_cfg.gpio_pin = PPS_PIN_INDEX;
+ } else {
+ clk_cfg.gpio_pin = chan;
+ }
+
+ clk_cfg.period = ((rq->perout.period.sec * NSEC_PER_SEC) +
+ rq->perout.period.nsec);
+ clk_cfg.start_time = ((rq->perout.start.sec * NSEC_PER_SEC) +
+ rq->perout.start.nsec);
+ clk_cfg.ena = !!on;
+
+ err = ice_ptp_cfg_clkout(pf, chan, &clk_cfg, true);
+ break;
+ case PTP_CLK_REQ_EXTTS:
+ chan = rq->extts.index;
+ if (sma_pres) {
+ if (chan < ice_pin_desc_e810t[SMA2].chan)
+ gpio_pin = GPIO_21;
+ else
+ gpio_pin = GPIO_23;
+ } else if (ice_is_e810t(&pf->hw)) {
+ if (chan == 0)
+ gpio_pin = GPIO_21;
+ else
+ gpio_pin = GPIO_23;
+ } else {
+ gpio_pin = chan;
+ }
+
+ err = ice_ptp_cfg_extts(pf, !!on, chan, gpio_pin,
+ rq->extts.flags);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+/**
+ * ice_ptp_gettimex64 - Get the time of the clock
+ * @info: the driver's PTP info structure
+ * @ts: timespec64 structure to hold the current time value
+ * @sts: Optional parameter for holding a pair of system timestamps from
+ * the system clock. Will be ignored if NULL is given.
+ *
+ * Read the device clock and return the correct value on ns, after converting it
+ * into a timespec struct.
+ */
+static int
+ice_ptp_gettimex64(struct ptp_clock_info *info, struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
+{
+ struct ice_pf *pf = ptp_info_to_pf(info);
+ struct ice_hw *hw = &pf->hw;
+
+ if (!ice_ptp_lock(hw)) {
+ dev_err(ice_pf_to_dev(pf), "PTP failed to get time\n");
+ return -EBUSY;
+ }
+
+ ice_ptp_read_time(pf, ts, sts);
+ ice_ptp_unlock(hw);
+
+ return 0;
+}
+
+/**
+ * ice_ptp_settime64 - Set the time of the clock
+ * @info: the driver's PTP info structure
+ * @ts: timespec64 structure that holds the new time value
+ *
+ * Set the device clock to the user input value. The conversion from timespec
+ * to ns happens in the write function.
+ */
+static int
+ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts)
+{
+ struct ice_pf *pf = ptp_info_to_pf(info);
+ struct timespec64 ts64 = *ts;
+ struct ice_hw *hw = &pf->hw;
+ int err;
+
+ /* For Vernier mode, we need to recalibrate after new settime
+ * Start with disabling timestamp block
+ */
+ if (pf->ptp.port.link_up)
+ ice_ptp_port_phy_stop(&pf->ptp.port);
+
+ if (!ice_ptp_lock(hw)) {
+ err = -EBUSY;
+ goto exit;
+ }
+
+ /* Disable periodic outputs */
+ ice_ptp_disable_all_clkout(pf);
+
+ err = ice_ptp_write_init(pf, &ts64);
+ ice_ptp_unlock(hw);
+
+ if (!err)
+ ice_ptp_reset_cached_phctime(pf);
+
+ /* Reenable periodic outputs */
+ ice_ptp_enable_all_clkout(pf);
+
+ /* Recalibrate and re-enable timestamp block */
+ if (pf->ptp.port.link_up)
+ ice_ptp_port_phy_restart(&pf->ptp.port);
+exit:
+ if (err) {
+ dev_err(ice_pf_to_dev(pf), "PTP failed to set time %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_ptp_adjtime_nonatomic - Do a non-atomic clock adjustment
+ * @info: the driver's PTP info structure
+ * @delta: Offset in nanoseconds to adjust the time by
+ */
+static int ice_ptp_adjtime_nonatomic(struct ptp_clock_info *info, s64 delta)
+{
+ struct timespec64 now, then;
+ int ret;
+
+ then = ns_to_timespec64(delta);
+ ret = ice_ptp_gettimex64(info, &now, NULL);
+ if (ret)
+ return ret;
+ now = timespec64_add(now, then);
+
+ return ice_ptp_settime64(info, (const struct timespec64 *)&now);
+}
+
+/**
+ * ice_ptp_adjtime - Adjust the time of the clock by the indicated delta
+ * @info: the driver's PTP info structure
+ * @delta: Offset in nanoseconds to adjust the time by
+ */
+static int ice_ptp_adjtime(struct ptp_clock_info *info, s64 delta)
+{
+ struct ice_pf *pf = ptp_info_to_pf(info);
+ struct ice_hw *hw = &pf->hw;
+ struct device *dev;
+ int err;
+
+ dev = ice_pf_to_dev(pf);
+
+ /* Hardware only supports atomic adjustments using signed 32-bit
+ * integers. For any adjustment outside this range, perform
+ * a non-atomic get->adjust->set flow.
+ */
+ if (delta > S32_MAX || delta < S32_MIN) {
+ dev_dbg(dev, "delta = %lld, adjtime non-atomic\n", delta);
+ return ice_ptp_adjtime_nonatomic(info, delta);
+ }
+
+ if (!ice_ptp_lock(hw)) {
+ dev_err(dev, "PTP failed to acquire semaphore in adjtime\n");
+ return -EBUSY;
+ }
+
+ /* Disable periodic outputs */
+ ice_ptp_disable_all_clkout(pf);
+
+ err = ice_ptp_write_adj(pf, delta);
+
+ /* Reenable periodic outputs */
+ ice_ptp_enable_all_clkout(pf);
+
+ ice_ptp_unlock(hw);
+
+ if (err) {
+ dev_err(dev, "PTP failed to adjust time, err %d\n", err);
+ return err;
+ }
+
+ ice_ptp_reset_cached_phctime(pf);
+
+ return 0;
+}
+
+#ifdef CONFIG_ICE_HWTS
+/**
+ * ice_ptp_get_syncdevicetime - Get the cross time stamp info
+ * @device: Current device time
+ * @system: System counter value read synchronously with device time
+ * @ctx: Context provided by timekeeping code
+ *
+ * Read device and system (ART) clock simultaneously and return the corrected
+ * clock values in ns.
+ */
+static int
+ice_ptp_get_syncdevicetime(ktime_t *device,
+ struct system_counterval_t *system,
+ void *ctx)
+{
+ struct ice_pf *pf = (struct ice_pf *)ctx;
+ struct ice_hw *hw = &pf->hw;
+ u32 hh_lock, hh_art_ctl;
+ int i;
+
+ /* Get the HW lock */
+ hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id));
+ if (hh_lock & PFHH_SEM_BUSY_M) {
+ dev_err(ice_pf_to_dev(pf), "PTP failed to get hh lock\n");
+ return -EFAULT;
+ }
+
+ /* Start the ART and device clock sync sequence */
+ hh_art_ctl = rd32(hw, GLHH_ART_CTL);
+ hh_art_ctl = hh_art_ctl | GLHH_ART_CTL_ACTIVE_M;
+ wr32(hw, GLHH_ART_CTL, hh_art_ctl);
+
+#define MAX_HH_LOCK_TRIES 100
+
+ for (i = 0; i < MAX_HH_LOCK_TRIES; i++) {
+ /* Wait for sync to complete */
+ hh_art_ctl = rd32(hw, GLHH_ART_CTL);
+ if (hh_art_ctl & GLHH_ART_CTL_ACTIVE_M) {
+ udelay(1);
+ continue;
+ } else {
+ u32 hh_ts_lo, hh_ts_hi, tmr_idx;
+ u64 hh_ts;
+
+ tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc;
+ /* Read ART time */
+ hh_ts_lo = rd32(hw, GLHH_ART_TIME_L);
+ hh_ts_hi = rd32(hw, GLHH_ART_TIME_H);
+ hh_ts = ((u64)hh_ts_hi << 32) | hh_ts_lo;
+ *system = convert_art_ns_to_tsc(hh_ts);
+ /* Read Device source clock time */
+ hh_ts_lo = rd32(hw, GLTSYN_HHTIME_L(tmr_idx));
+ hh_ts_hi = rd32(hw, GLTSYN_HHTIME_H(tmr_idx));
+ hh_ts = ((u64)hh_ts_hi << 32) | hh_ts_lo;
+ *device = ns_to_ktime(hh_ts);
+ break;
+ }
+ }
+ /* Release HW lock */
+ hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id));
+ hh_lock = hh_lock & ~PFHH_SEM_BUSY_M;
+ wr32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id), hh_lock);
+
+ if (i == MAX_HH_LOCK_TRIES)
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+/**
+ * ice_ptp_getcrosststamp_e822 - Capture a device cross timestamp
+ * @info: the driver's PTP info structure
+ * @cts: The memory to fill the cross timestamp info
+ *
+ * Capture a cross timestamp between the ART and the device PTP hardware
+ * clock. Fill the cross timestamp information and report it back to the
+ * caller.
+ *
+ * This is only valid for E822 devices which have support for generating the
+ * cross timestamp via PCIe PTM.
+ *
+ * In order to correctly correlate the ART timestamp back to the TSC time, the
+ * CPU must have X86_FEATURE_TSC_KNOWN_FREQ.
+ */
+static int
+ice_ptp_getcrosststamp_e822(struct ptp_clock_info *info,
+ struct system_device_crosststamp *cts)
+{
+ struct ice_pf *pf = ptp_info_to_pf(info);
+
+ return get_device_system_crosststamp(ice_ptp_get_syncdevicetime,
+ pf, NULL, cts);
+}
+#endif /* CONFIG_ICE_HWTS */
+
+/**
+ * ice_ptp_get_ts_config - ioctl interface to read the timestamping config
+ * @pf: Board private structure
+ * @ifr: ioctl data
+ *
+ * Copy the timestamping config to user buffer
+ */
+int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr)
+{
+ struct hwtstamp_config *config;
+
+ if (!test_bit(ICE_FLAG_PTP, pf->flags))
+ return -EIO;
+
+ config = &pf->ptp.tstamp_config;
+
+ return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
+ -EFAULT : 0;
+}
+
+/**
+ * ice_ptp_set_timestamp_mode - Setup driver for requested timestamp mode
+ * @pf: Board private structure
+ * @config: hwtstamp settings requested or saved
+ */
+static int
+ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config)
+{
+ switch (config->tx_type) {
+ case HWTSTAMP_TX_OFF:
+ ice_set_tx_tstamp(pf, false);
+ break;
+ case HWTSTAMP_TX_ON:
+ ice_set_tx_tstamp(pf, true);
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ switch (config->rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ ice_set_rx_tstamp(pf, false);
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_NTP_ALL:
+ case HWTSTAMP_FILTER_ALL:
+ ice_set_rx_tstamp(pf, true);
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_ptp_set_ts_config - ioctl interface to control the timestamping
+ * @pf: Board private structure
+ * @ifr: ioctl data
+ *
+ * Get the user config and store it
+ */
+int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr)
+{
+ struct hwtstamp_config config;
+ int err;
+
+ if (!test_bit(ICE_FLAG_PTP, pf->flags))
+ return -EAGAIN;
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+
+ err = ice_ptp_set_timestamp_mode(pf, &config);
+ if (err)
+ return err;
+
+ /* Return the actual configuration set */
+ config = pf->ptp.tstamp_config;
+
+ return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+ -EFAULT : 0;
+}
+
+/**
+ * ice_ptp_rx_hwtstamp - Check for an Rx timestamp
+ * @rx_ring: Ring to get the VSI info
+ * @rx_desc: Receive descriptor
+ * @skb: Particular skb to send timestamp with
+ *
+ * The driver receives a notification in the receive descriptor with timestamp.
+ * The timestamp is in ns, so we must convert the result first.
+ */
+void
+ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
+ union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb)
+{
+ struct skb_shared_hwtstamps *hwtstamps;
+ u64 ts_ns, cached_time;
+ u32 ts_high;
+
+ if (!(rx_desc->wb.time_stamp_low & ICE_PTP_TS_VALID))
+ return;
+
+ cached_time = READ_ONCE(rx_ring->cached_phctime);
+
+ /* Do not report a timestamp if we don't have a cached PHC time */
+ if (!cached_time)
+ return;
+
+ /* Use ice_ptp_extend_32b_ts directly, using the ring-specific cached
+ * PHC value, rather than accessing the PF. This also allows us to
+ * simply pass the upper 32bits of nanoseconds directly. Calling
+ * ice_ptp_extend_40b_ts is unnecessary as it would just discard these
+ * bits itself.
+ */
+ ts_high = le32_to_cpu(rx_desc->wb.flex_ts.ts_high);
+ ts_ns = ice_ptp_extend_32b_ts(cached_time, ts_high);
+
+ hwtstamps = skb_hwtstamps(skb);
+ memset(hwtstamps, 0, sizeof(*hwtstamps));
+ hwtstamps->hwtstamp = ns_to_ktime(ts_ns);
+}
+
+/**
+ * ice_ptp_disable_sma_pins_e810t - Disable E810-T SMA pins
+ * @pf: pointer to the PF structure
+ * @info: PTP clock info structure
+ *
+ * Disable the OS access to the SMA pins. Called to clear out the OS
+ * indications of pin support when we fail to setup the E810-T SMA control
+ * register.
+ */
+static void
+ice_ptp_disable_sma_pins_e810t(struct ice_pf *pf, struct ptp_clock_info *info)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+
+ dev_warn(dev, "Failed to configure E810-T SMA pin control\n");
+
+ info->enable = NULL;
+ info->verify = NULL;
+ info->n_pins = 0;
+ info->n_ext_ts = 0;
+ info->n_per_out = 0;
+}
+
+/**
+ * ice_ptp_setup_sma_pins_e810t - Setup the SMA pins
+ * @pf: pointer to the PF structure
+ * @info: PTP clock info structure
+ *
+ * Finish setting up the SMA pins by allocating pin_config, and setting it up
+ * according to the current status of the SMA. On failure, disable all of the
+ * extended SMA pin support.
+ */
+static void
+ice_ptp_setup_sma_pins_e810t(struct ice_pf *pf, struct ptp_clock_info *info)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ int err;
+
+ /* Allocate memory for kernel pins interface */
+ info->pin_config = devm_kcalloc(dev, info->n_pins,
+ sizeof(*info->pin_config), GFP_KERNEL);
+ if (!info->pin_config) {
+ ice_ptp_disable_sma_pins_e810t(pf, info);
+ return;
+ }
+
+ /* Read current SMA status */
+ err = ice_get_sma_config_e810t(&pf->hw, info->pin_config);
+ if (err)
+ ice_ptp_disable_sma_pins_e810t(pf, info);
+}
+
+/**
+ * ice_ptp_setup_pins_e810 - Setup PTP pins in sysfs
+ * @pf: pointer to the PF instance
+ * @info: PTP clock capabilities
+ */
+static void
+ice_ptp_setup_pins_e810(struct ice_pf *pf, struct ptp_clock_info *info)
+{
+ info->n_per_out = N_PER_OUT_E810;
+
+ if (ice_is_feature_supported(pf, ICE_F_PTP_EXTTS))
+ info->n_ext_ts = N_EXT_TS_E810;
+
+ if (ice_is_feature_supported(pf, ICE_F_SMA_CTRL)) {
+ info->n_ext_ts = N_EXT_TS_E810;
+ info->n_pins = NUM_PTP_PINS_E810T;
+ info->verify = ice_verify_pin_e810t;
+
+ /* Complete setup of the SMA pins */
+ ice_ptp_setup_sma_pins_e810t(pf, info);
+ }
+}
+
+/**
+ * ice_ptp_set_funcs_e822 - Set specialized functions for E822 support
+ * @pf: Board private structure
+ * @info: PTP info to fill
+ *
+ * Assign functions to the PTP capabiltiies structure for E822 devices.
+ * Functions which operate across all device families should be set directly
+ * in ice_ptp_set_caps. Only add functions here which are distinct for E822
+ * devices.
+ */
+static void
+ice_ptp_set_funcs_e822(struct ice_pf *pf, struct ptp_clock_info *info)
+{
+#ifdef CONFIG_ICE_HWTS
+ if (boot_cpu_has(X86_FEATURE_ART) &&
+ boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ))
+ info->getcrosststamp = ice_ptp_getcrosststamp_e822;
+#endif /* CONFIG_ICE_HWTS */
+}
+
+/**
+ * ice_ptp_set_funcs_e810 - Set specialized functions for E810 support
+ * @pf: Board private structure
+ * @info: PTP info to fill
+ *
+ * Assign functions to the PTP capabiltiies structure for E810 devices.
+ * Functions which operate across all device families should be set directly
+ * in ice_ptp_set_caps. Only add functions here which are distinct for e810
+ * devices.
+ */
+static void
+ice_ptp_set_funcs_e810(struct ice_pf *pf, struct ptp_clock_info *info)
+{
+ info->enable = ice_ptp_gpio_enable_e810;
+ ice_ptp_setup_pins_e810(pf, info);
+}
+
+/**
+ * ice_ptp_set_caps - Set PTP capabilities
+ * @pf: Board private structure
+ */
+static void ice_ptp_set_caps(struct ice_pf *pf)
+{
+ struct ptp_clock_info *info = &pf->ptp.info;
+ struct device *dev = ice_pf_to_dev(pf);
+
+ snprintf(info->name, sizeof(info->name) - 1, "%s-%s-clk",
+ dev_driver_string(dev), dev_name(dev));
+ info->owner = THIS_MODULE;
+ info->max_adj = 100000000;
+ info->adjtime = ice_ptp_adjtime;
+ info->adjfine = ice_ptp_adjfine;
+ info->gettimex64 = ice_ptp_gettimex64;
+ info->settime64 = ice_ptp_settime64;
+
+ if (ice_is_e810(&pf->hw))
+ ice_ptp_set_funcs_e810(pf, info);
+ else
+ ice_ptp_set_funcs_e822(pf, info);
+}
+
+/**
+ * ice_ptp_create_clock - Create PTP clock device for userspace
+ * @pf: Board private structure
+ *
+ * This function creates a new PTP clock device. It only creates one if we
+ * don't already have one. Will return error if it can't create one, but success
+ * if we already have a device. Should be used by ice_ptp_init to create clock
+ * initially, and prevent global resets from creating new clock devices.
+ */
+static long ice_ptp_create_clock(struct ice_pf *pf)
+{
+ struct ptp_clock_info *info;
+ struct ptp_clock *clock;
+ struct device *dev;
+
+ /* No need to create a clock device if we already have one */
+ if (pf->ptp.clock)
+ return 0;
+
+ ice_ptp_set_caps(pf);
+
+ info = &pf->ptp.info;
+ dev = ice_pf_to_dev(pf);
+
+ /* Attempt to register the clock before enabling the hardware. */
+ clock = ptp_clock_register(info, dev);
+ if (IS_ERR(clock))
+ return PTR_ERR(clock);
+
+ pf->ptp.clock = clock;
+
+ return 0;
+}
+
+/**
+ * ice_ptp_request_ts - Request an available Tx timestamp index
+ * @tx: the PTP Tx timestamp tracker to request from
+ * @skb: the SKB to associate with this timestamp request
+ */
+s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb)
+{
+ u8 idx;
+
+ /* Check if this tracker is initialized */
+ if (!tx->init || tx->calibrating)
+ return -1;
+
+ spin_lock(&tx->lock);
+ /* Find and set the first available index */
+ idx = find_first_zero_bit(tx->in_use, tx->len);
+ if (idx < tx->len) {
+ /* We got a valid index that no other thread could have set. Store
+ * a reference to the skb and the start time to allow discarding old
+ * requests.
+ */
+ set_bit(idx, tx->in_use);
+ tx->tstamps[idx].start = jiffies;
+ tx->tstamps[idx].skb = skb_get(skb);
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+ ice_trace(tx_tstamp_request, skb, idx);
+ }
+
+ spin_unlock(&tx->lock);
+
+ /* return the appropriate PHY timestamp register index, -1 if no
+ * indexes were available.
+ */
+ if (idx >= tx->len)
+ return -1;
+ else
+ return idx + tx->quad_offset;
+}
+
+/**
+ * ice_ptp_process_ts - Process the PTP Tx timestamps
+ * @pf: Board private structure
+ *
+ * Returns true if timestamps are processed.
+ */
+bool ice_ptp_process_ts(struct ice_pf *pf)
+{
+ return ice_ptp_tx_tstamp(&pf->ptp.port.tx);
+}
+
+static void ice_ptp_periodic_work(struct kthread_work *work)
+{
+ struct ice_ptp *ptp = container_of(work, struct ice_ptp, work.work);
+ struct ice_pf *pf = container_of(ptp, struct ice_pf, ptp);
+ int err;
+
+ if (!test_bit(ICE_FLAG_PTP, pf->flags))
+ return;
+
+ err = ice_ptp_update_cached_phctime(pf);
+
+ ice_ptp_tx_tstamp_cleanup(pf, &pf->ptp.port.tx);
+
+ /* Run twice a second or reschedule if phc update failed */
+ kthread_queue_delayed_work(ptp->kworker, &ptp->work,
+ msecs_to_jiffies(err ? 10 : 500));
+}
+
+/**
+ * ice_ptp_reset - Initialize PTP hardware clock support after reset
+ * @pf: Board private structure
+ */
+void ice_ptp_reset(struct ice_pf *pf)
+{
+ struct ice_ptp *ptp = &pf->ptp;
+ struct ice_hw *hw = &pf->hw;
+ struct timespec64 ts;
+ int err, itr = 1;
+ u64 time_diff;
+
+ if (test_bit(ICE_PFR_REQ, pf->state))
+ goto pfr;
+
+ if (!hw->func_caps.ts_func_info.src_tmr_owned)
+ goto reset_ts;
+
+ err = ice_ptp_init_phc(hw);
+ if (err)
+ goto err;
+
+ /* Acquire the global hardware lock */
+ if (!ice_ptp_lock(hw)) {
+ err = -EBUSY;
+ goto err;
+ }
+
+ /* Write the increment time value to PHY and LAN */
+ err = ice_ptp_write_incval(hw, ice_base_incval(pf));
+ if (err) {
+ ice_ptp_unlock(hw);
+ goto err;
+ }
+
+ /* Write the initial Time value to PHY and LAN using the cached PHC
+ * time before the reset and time difference between stopping and
+ * starting the clock.
+ */
+ if (ptp->cached_phc_time) {
+ time_diff = ktime_get_real_ns() - ptp->reset_time;
+ ts = ns_to_timespec64(ptp->cached_phc_time + time_diff);
+ } else {
+ ts = ktime_to_timespec64(ktime_get_real());
+ }
+ err = ice_ptp_write_init(pf, &ts);
+ if (err) {
+ ice_ptp_unlock(hw);
+ goto err;
+ }
+
+ /* Release the global hardware lock */
+ ice_ptp_unlock(hw);
+
+ if (!ice_is_e810(hw)) {
+ /* Enable quad interrupts */
+ err = ice_ptp_tx_ena_intr(pf, true, itr);
+ if (err)
+ goto err;
+ }
+
+reset_ts:
+ /* Restart the PHY timestamping block */
+ ice_ptp_reset_phy_timestamping(pf);
+
+pfr:
+ /* Init Tx structures */
+ if (ice_is_e810(&pf->hw)) {
+ err = ice_ptp_init_tx_e810(pf, &ptp->port.tx);
+ } else {
+ kthread_init_delayed_work(&ptp->port.ov_work,
+ ice_ptp_wait_for_offset_valid);
+ err = ice_ptp_init_tx_e822(pf, &ptp->port.tx,
+ ptp->port.port_num);
+ }
+ if (err)
+ goto err;
+
+ set_bit(ICE_FLAG_PTP, pf->flags);
+
+ /* Start periodic work going */
+ kthread_queue_delayed_work(ptp->kworker, &ptp->work, 0);
+
+ dev_info(ice_pf_to_dev(pf), "PTP reset successful\n");
+ return;
+
+err:
+ dev_err(ice_pf_to_dev(pf), "PTP reset failed %d\n", err);
+}
+
+/**
+ * ice_ptp_prepare_for_reset - Prepare PTP for reset
+ * @pf: Board private structure
+ */
+void ice_ptp_prepare_for_reset(struct ice_pf *pf)
+{
+ struct ice_ptp *ptp = &pf->ptp;
+ u8 src_tmr;
+
+ clear_bit(ICE_FLAG_PTP, pf->flags);
+
+ /* Disable timestamping for both Tx and Rx */
+ ice_ptp_cfg_timestamp(pf, false);
+
+ kthread_cancel_delayed_work_sync(&ptp->work);
+
+ if (test_bit(ICE_PFR_REQ, pf->state))
+ return;
+
+ ice_ptp_release_tx_tracker(pf, &pf->ptp.port.tx);
+
+ /* Disable periodic outputs */
+ ice_ptp_disable_all_clkout(pf);
+
+ src_tmr = ice_get_ptp_src_clock_index(&pf->hw);
+
+ /* Disable source clock */
+ wr32(&pf->hw, GLTSYN_ENA(src_tmr), (u32)~GLTSYN_ENA_TSYN_ENA_M);
+
+ /* Acquire PHC and system timer to restore after reset */
+ ptp->reset_time = ktime_get_real_ns();
+}
+
+/**
+ * ice_ptp_init_owner - Initialize PTP_1588_CLOCK device
+ * @pf: Board private structure
+ *
+ * Setup and initialize a PTP clock device that represents the device hardware
+ * clock. Save the clock index for other functions connected to the same
+ * hardware resource.
+ */
+static int ice_ptp_init_owner(struct ice_pf *pf)
+{
+ struct ice_hw *hw = &pf->hw;
+ struct timespec64 ts;
+ int err, itr = 1;
+
+ err = ice_ptp_init_phc(hw);
+ if (err) {
+ dev_err(ice_pf_to_dev(pf), "Failed to initialize PHC, err %d\n",
+ err);
+ return err;
+ }
+
+ /* Acquire the global hardware lock */
+ if (!ice_ptp_lock(hw)) {
+ err = -EBUSY;
+ goto err_exit;
+ }
+
+ /* Write the increment time value to PHY and LAN */
+ err = ice_ptp_write_incval(hw, ice_base_incval(pf));
+ if (err) {
+ ice_ptp_unlock(hw);
+ goto err_exit;
+ }
+
+ ts = ktime_to_timespec64(ktime_get_real());
+ /* Write the initial Time value to PHY and LAN */
+ err = ice_ptp_write_init(pf, &ts);
+ if (err) {
+ ice_ptp_unlock(hw);
+ goto err_exit;
+ }
+
+ /* Release the global hardware lock */
+ ice_ptp_unlock(hw);
+
+ if (!ice_is_e810(hw)) {
+ /* Enable quad interrupts */
+ err = ice_ptp_tx_ena_intr(pf, true, itr);
+ if (err)
+ goto err_exit;
+ }
+
+ /* Ensure we have a clock device */
+ err = ice_ptp_create_clock(pf);
+ if (err)
+ goto err_clk;
+
+ /* Store the PTP clock index for other PFs */
+ ice_set_ptp_clock_index(pf);
+
+ return 0;
+
+err_clk:
+ pf->ptp.clock = NULL;
+err_exit:
+ return err;
+}
+
+/**
+ * ice_ptp_init_work - Initialize PTP work threads
+ * @pf: Board private structure
+ * @ptp: PF PTP structure
+ */
+static int ice_ptp_init_work(struct ice_pf *pf, struct ice_ptp *ptp)
+{
+ struct kthread_worker *kworker;
+
+ /* Initialize work functions */
+ kthread_init_delayed_work(&ptp->work, ice_ptp_periodic_work);
+
+ /* Allocate a kworker for handling work required for the ports
+ * connected to the PTP hardware clock.
+ */
+ kworker = kthread_create_worker(0, "ice-ptp-%s",
+ dev_name(ice_pf_to_dev(pf)));
+ if (IS_ERR(kworker))
+ return PTR_ERR(kworker);
+
+ ptp->kworker = kworker;
+
+ /* Start periodic work going */
+ kthread_queue_delayed_work(ptp->kworker, &ptp->work, 0);
+
+ return 0;
+}
+
+/**
+ * ice_ptp_init_port - Initialize PTP port structure
+ * @pf: Board private structure
+ * @ptp_port: PTP port structure
+ */
+static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port)
+{
+ mutex_init(&ptp_port->ps_lock);
+
+ if (ice_is_e810(&pf->hw))
+ return ice_ptp_init_tx_e810(pf, &ptp_port->tx);
+
+ kthread_init_delayed_work(&ptp_port->ov_work,
+ ice_ptp_wait_for_offset_valid);
+ return ice_ptp_init_tx_e822(pf, &ptp_port->tx, ptp_port->port_num);
+}
+
+/**
+ * ice_ptp_init - Initialize PTP hardware clock support
+ * @pf: Board private structure
+ *
+ * Set up the device for interacting with the PTP hardware clock for all
+ * functions, both the function that owns the clock hardware, and the
+ * functions connected to the clock hardware.
+ *
+ * The clock owner will allocate and register a ptp_clock with the
+ * PTP_1588_CLOCK infrastructure. All functions allocate a kthread and work
+ * items used for asynchronous work such as Tx timestamps and periodic work.
+ */
+void ice_ptp_init(struct ice_pf *pf)
+{
+ struct ice_ptp *ptp = &pf->ptp;
+ struct ice_hw *hw = &pf->hw;
+ int err;
+
+ /* If this function owns the clock hardware, it must allocate and
+ * configure the PTP clock device to represent it.
+ */
+ if (hw->func_caps.ts_func_info.src_tmr_owned) {
+ err = ice_ptp_init_owner(pf);
+ if (err)
+ goto err;
+ }
+
+ ptp->port.port_num = hw->pf_id;
+ err = ice_ptp_init_port(pf, &ptp->port);
+ if (err)
+ goto err;
+
+ /* Start the PHY timestamping block */
+ ice_ptp_reset_phy_timestamping(pf);
+
+ set_bit(ICE_FLAG_PTP, pf->flags);
+ err = ice_ptp_init_work(pf, ptp);
+ if (err)
+ goto err;
+
+ dev_info(ice_pf_to_dev(pf), "PTP init successful\n");
+ return;
+
+err:
+ /* If we registered a PTP clock, release it */
+ if (pf->ptp.clock) {
+ ptp_clock_unregister(ptp->clock);
+ pf->ptp.clock = NULL;
+ }
+ clear_bit(ICE_FLAG_PTP, pf->flags);
+ dev_err(ice_pf_to_dev(pf), "PTP failed %d\n", err);
+}
+
+/**
+ * ice_ptp_release - Disable the driver/HW support and unregister the clock
+ * @pf: Board private structure
+ *
+ * This function handles the cleanup work required from the initialization by
+ * clearing out the important information and unregistering the clock
+ */
+void ice_ptp_release(struct ice_pf *pf)
+{
+ if (!test_bit(ICE_FLAG_PTP, pf->flags))
+ return;
+
+ /* Disable timestamping for both Tx and Rx */
+ ice_ptp_cfg_timestamp(pf, false);
+
+ ice_ptp_release_tx_tracker(pf, &pf->ptp.port.tx);
+
+ clear_bit(ICE_FLAG_PTP, pf->flags);
+
+ kthread_cancel_delayed_work_sync(&pf->ptp.work);
+
+ ice_ptp_port_phy_stop(&pf->ptp.port);
+ mutex_destroy(&pf->ptp.port.ps_lock);
+ if (pf->ptp.kworker) {
+ kthread_destroy_worker(pf->ptp.kworker);
+ pf->ptp.kworker = NULL;
+ }
+
+ if (!pf->ptp.clock)
+ return;
+
+ /* Disable periodic outputs */
+ ice_ptp_disable_all_clkout(pf);
+
+ ice_clear_ptp_clock_index(pf);
+ ptp_clock_unregister(pf->ptp.clock);
+ pf->ptp.clock = NULL;
+
+ dev_info(ice_pf_to_dev(pf), "Removed PTP clock\n");
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
new file mode 100644
index 000000000..e689c05bb
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _ICE_PTP_H_
+#define _ICE_PTP_H_
+
+#include <linux/ptp_clock_kernel.h>
+#include <linux/kthread.h>
+
+#include "ice_ptp_hw.h"
+
+enum ice_ptp_pin_e810 {
+ GPIO_20 = 0,
+ GPIO_21,
+ GPIO_22,
+ GPIO_23,
+ NUM_PTP_PIN_E810
+};
+
+enum ice_ptp_pin_e810t {
+ GNSS = 0,
+ SMA1,
+ UFL1,
+ SMA2,
+ UFL2,
+ NUM_PTP_PINS_E810T
+};
+
+struct ice_perout_channel {
+ bool ena;
+ u32 gpio_pin;
+ u64 period;
+ u64 start_time;
+};
+
+/* The ice hardware captures Tx hardware timestamps in the PHY. The timestamp
+ * is stored in a buffer of registers. Depending on the specific hardware,
+ * this buffer might be shared across multiple PHY ports.
+ *
+ * On transmit of a packet to be timestamped, software is responsible for
+ * selecting an open index. Hardware makes no attempt to lock or prevent
+ * re-use of an index for multiple packets.
+ *
+ * To handle this, timestamp indexes must be tracked by software to ensure
+ * that an index is not re-used for multiple transmitted packets. The
+ * structures and functions declared in this file track the available Tx
+ * register indexes, as well as provide storage for the SKB pointers.
+ *
+ * To allow multiple ports to access the shared register block independently,
+ * the blocks are split up so that indexes are assigned to each port based on
+ * hardware logical port number.
+ *
+ * The timestamp blocks are handled differently for E810- and E822-based
+ * devices. In E810 devices, each port has its own block of timestamps, while in
+ * E822 there is a need to logically break the block of registers into smaller
+ * chunks based on the port number to avoid collisions.
+ *
+ * Example for port 5 in E810:
+ * +--------+--------+--------+--------+--------+--------+--------+--------+
+ * |register|register|register|register|register|register|register|register|
+ * | block | block | block | block | block | block | block | block |
+ * | for | for | for | for | for | for | for | for |
+ * | port 0 | port 1 | port 2 | port 3 | port 4 | port 5 | port 6 | port 7 |
+ * +--------+--------+--------+--------+--------+--------+--------+--------+
+ * ^^
+ * ||
+ * |--- quad offset is always 0
+ * ---- quad number
+ *
+ * Example for port 5 in E822:
+ * +-----------------------------+-----------------------------+
+ * | register block for quad 0 | register block for quad 1 |
+ * |+------+------+------+------+|+------+------+------+------+|
+ * ||port 0|port 1|port 2|port 3|||port 0|port 1|port 2|port 3||
+ * |+------+------+------+------+|+------+------+------+------+|
+ * +-----------------------------+-------^---------------------+
+ * ^ |
+ * | --- quad offset*
+ * ---- quad number
+ *
+ * * PHY port 5 is port 1 in quad 1
+ *
+ */
+
+/**
+ * struct ice_tx_tstamp - Tracking for a single Tx timestamp
+ * @skb: pointer to the SKB for this timestamp request
+ * @start: jiffies when the timestamp was first requested
+ * @cached_tstamp: last read timestamp
+ *
+ * This structure tracks a single timestamp request. The SKB pointer is
+ * provided when initiating a request. The start time is used to ensure that
+ * we discard old requests that were not fulfilled within a 2 second time
+ * window.
+ * Timestamp values in the PHY are read only and do not get cleared except at
+ * hardware reset or when a new timestamp value is captured. The cached_tstamp
+ * field is used to detect the case where a new timestamp has not yet been
+ * captured, ensuring that we avoid sending stale timestamp data to the stack.
+ */
+struct ice_tx_tstamp {
+ struct sk_buff *skb;
+ unsigned long start;
+ u64 cached_tstamp;
+};
+
+/**
+ * struct ice_ptp_tx - Tracking structure for all Tx timestamp requests on a port
+ * @lock: lock to prevent concurrent write to in_use bitmap
+ * @tstamps: array of len to store outstanding requests
+ * @in_use: bitmap of len to indicate which slots are in use
+ * @quad: which quad the timestamps are captured in
+ * @quad_offset: offset into timestamp block of the quad to get the real index
+ * @len: length of the tstamps and in_use fields.
+ * @init: if true, the tracker is initialized;
+ * @calibrating: if true, the PHY is calibrating the Tx offset. During this
+ * window, timestamps are temporarily disabled.
+ */
+struct ice_ptp_tx {
+ spinlock_t lock; /* lock protecting in_use bitmap */
+ struct ice_tx_tstamp *tstamps;
+ unsigned long *in_use;
+ u8 quad;
+ u8 quad_offset;
+ u8 len;
+ u8 init;
+ u8 calibrating;
+};
+
+/* Quad and port information for initializing timestamp blocks */
+#define INDEX_PER_QUAD 64
+#define INDEX_PER_PORT (INDEX_PER_QUAD / ICE_PORTS_PER_QUAD)
+
+/**
+ * struct ice_ptp_port - data used to initialize an external port for PTP
+ *
+ * This structure contains data indicating whether a single external port is
+ * ready for PTP functionality. It is used to track the port initialization
+ * and determine when the port's PHY offset is valid.
+ *
+ * @tx: Tx timestamp tracking for this port
+ * @ov_work: delayed work task for tracking when PHY offset is valid
+ * @ps_lock: mutex used to protect the overall PTP PHY start procedure
+ * @link_up: indicates whether the link is up
+ * @tx_fifo_busy_cnt: number of times the Tx FIFO was busy
+ * @port_num: the port number this structure represents
+ */
+struct ice_ptp_port {
+ struct ice_ptp_tx tx;
+ struct kthread_delayed_work ov_work;
+ struct mutex ps_lock; /* protects overall PTP PHY start procedure */
+ bool link_up;
+ u8 tx_fifo_busy_cnt;
+ u8 port_num;
+};
+
+#define GLTSYN_TGT_H_IDX_MAX 4
+
+/**
+ * struct ice_ptp - data used for integrating with CONFIG_PTP_1588_CLOCK
+ * @port: data for the PHY port initialization procedure
+ * @work: delayed work function for periodic tasks
+ * @cached_phc_time: a cached copy of the PHC time for timestamp extension
+ * @cached_phc_jiffies: jiffies when cached_phc_time was last updated
+ * @ext_ts_chan: the external timestamp channel in use
+ * @ext_ts_irq: the external timestamp IRQ in use
+ * @kworker: kwork thread for handling periodic work
+ * @perout_channels: periodic output data
+ * @info: structure defining PTP hardware capabilities
+ * @clock: pointer to registered PTP clock device
+ * @tstamp_config: hardware timestamping configuration
+ * @reset_time: kernel time after clock stop on reset
+ * @tx_hwtstamp_skipped: number of Tx time stamp requests skipped
+ * @tx_hwtstamp_timeouts: number of Tx skbs discarded with no time stamp
+ * @tx_hwtstamp_flushed: number of Tx skbs flushed due to interface closed
+ * @tx_hwtstamp_discarded: number of Tx skbs discarded due to cached PHC time
+ * being too old to correctly extend timestamp
+ * @late_cached_phc_updates: number of times cached PHC update is late
+ */
+struct ice_ptp {
+ struct ice_ptp_port port;
+ struct kthread_delayed_work work;
+ u64 cached_phc_time;
+ unsigned long cached_phc_jiffies;
+ u8 ext_ts_chan;
+ u8 ext_ts_irq;
+ struct kthread_worker *kworker;
+ struct ice_perout_channel perout_channels[GLTSYN_TGT_H_IDX_MAX];
+ struct ptp_clock_info info;
+ struct ptp_clock *clock;
+ struct hwtstamp_config tstamp_config;
+ u64 reset_time;
+ u32 tx_hwtstamp_skipped;
+ u32 tx_hwtstamp_timeouts;
+ u32 tx_hwtstamp_flushed;
+ u32 tx_hwtstamp_discarded;
+ u32 late_cached_phc_updates;
+};
+
+#define __ptp_port_to_ptp(p) \
+ container_of((p), struct ice_ptp, port)
+#define ptp_port_to_pf(p) \
+ container_of(__ptp_port_to_ptp((p)), struct ice_pf, ptp)
+
+#define __ptp_info_to_ptp(i) \
+ container_of((i), struct ice_ptp, info)
+#define ptp_info_to_pf(i) \
+ container_of(__ptp_info_to_ptp((i)), struct ice_pf, ptp)
+
+#define PFTSYN_SEM_BYTES 4
+#define PTP_SHARED_CLK_IDX_VALID BIT(31)
+#define TS_CMD_MASK 0xF
+#define SYNC_EXEC_CMD 0x3
+#define ICE_PTP_TS_VALID BIT(0)
+
+#define FIFO_EMPTY BIT(2)
+#define FIFO_OK 0xFF
+#define ICE_PTP_FIFO_NUM_CHECKS 5
+/* Per-channel register definitions */
+#define GLTSYN_AUX_OUT(_chan, _idx) (GLTSYN_AUX_OUT_0(_idx) + ((_chan) * 8))
+#define GLTSYN_AUX_IN(_chan, _idx) (GLTSYN_AUX_IN_0(_idx) + ((_chan) * 8))
+#define GLTSYN_CLKO(_chan, _idx) (GLTSYN_CLKO_0(_idx) + ((_chan) * 8))
+#define GLTSYN_TGT_L(_chan, _idx) (GLTSYN_TGT_L_0(_idx) + ((_chan) * 16))
+#define GLTSYN_TGT_H(_chan, _idx) (GLTSYN_TGT_H_0(_idx) + ((_chan) * 16))
+#define GLTSYN_EVNT_L(_chan, _idx) (GLTSYN_EVNT_L_0(_idx) + ((_chan) * 16))
+#define GLTSYN_EVNT_H(_chan, _idx) (GLTSYN_EVNT_H_0(_idx) + ((_chan) * 16))
+#define GLTSYN_EVNT_H_IDX_MAX 3
+
+/* Pin definitions for PTP PPS out */
+#define PPS_CLK_GEN_CHAN 3
+#define PPS_CLK_SRC_CHAN 2
+#define PPS_PIN_INDEX 5
+#define TIME_SYNC_PIN_INDEX 4
+#define N_EXT_TS_E810 3
+#define N_PER_OUT_E810 4
+#define N_PER_OUT_E810T 3
+#define N_PER_OUT_NO_SMA_E810T 2
+#define N_EXT_TS_NO_SMA_E810T 2
+#define ETH_GLTSYN_ENA(_i) (0x03000348 + ((_i) * 4))
+
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
+struct ice_pf;
+int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr);
+int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr);
+void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena);
+int ice_get_ptp_clock_index(struct ice_pf *pf);
+
+void ice_ptp_extts_event(struct ice_pf *pf);
+s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb);
+bool ice_ptp_process_ts(struct ice_pf *pf);
+
+void
+ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
+ union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb);
+void ice_ptp_reset(struct ice_pf *pf);
+void ice_ptp_prepare_for_reset(struct ice_pf *pf);
+void ice_ptp_init(struct ice_pf *pf);
+void ice_ptp_release(struct ice_pf *pf);
+int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup);
+#else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
+static inline int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena) { }
+static inline int ice_get_ptp_clock_index(struct ice_pf *pf)
+{
+ return -1;
+}
+
+static inline void ice_ptp_extts_event(struct ice_pf *pf) { }
+static inline s8
+ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb)
+{
+ return -1;
+}
+
+static inline bool ice_ptp_process_ts(struct ice_pf *pf)
+{
+ return true;
+}
+static inline void
+ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
+ union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { }
+static inline void ice_ptp_reset(struct ice_pf *pf) { }
+static inline void ice_ptp_prepare_for_reset(struct ice_pf *pf) { }
+static inline void ice_ptp_init(struct ice_pf *pf) { }
+static inline void ice_ptp_release(struct ice_pf *pf) { }
+static inline int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
+{ return 0; }
+#endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
+#endif /* _ICE_PTP_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_consts.h b/drivers/net/ethernet/intel/ice/ice_ptp_consts.h
new file mode 100644
index 000000000..4109aa3b2
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_consts.h
@@ -0,0 +1,374 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2021, Intel Corporation. */
+
+#ifndef _ICE_PTP_CONSTS_H_
+#define _ICE_PTP_CONSTS_H_
+
+/* Constant definitions related to the hardware clock used for PTP 1588
+ * features and functionality.
+ */
+/* Constants defined for the PTP 1588 clock hardware. */
+
+/* struct ice_time_ref_info_e822
+ *
+ * E822 hardware can use different sources as the reference for the PTP
+ * hardware clock. Each clock has different characteristics such as a slightly
+ * different frequency, etc.
+ *
+ * This lookup table defines several constants that depend on the current time
+ * reference. See the struct ice_time_ref_info_e822 for information about the
+ * meaning of each constant.
+ */
+const struct ice_time_ref_info_e822 e822_time_ref[NUM_ICE_TIME_REF_FREQ] = {
+ /* ICE_TIME_REF_FREQ_25_000 -> 25 MHz */
+ {
+ /* pll_freq */
+ 823437500, /* 823.4375 MHz PLL */
+ /* nominal_incval */
+ 0x136e44fabULL,
+ /* pps_delay */
+ 11,
+ },
+
+ /* ICE_TIME_REF_FREQ_122_880 -> 122.88 MHz */
+ {
+ /* pll_freq */
+ 783360000, /* 783.36 MHz */
+ /* nominal_incval */
+ 0x146cc2177ULL,
+ /* pps_delay */
+ 12,
+ },
+
+ /* ICE_TIME_REF_FREQ_125_000 -> 125 MHz */
+ {
+ /* pll_freq */
+ 796875000, /* 796.875 MHz */
+ /* nominal_incval */
+ 0x141414141ULL,
+ /* pps_delay */
+ 12,
+ },
+
+ /* ICE_TIME_REF_FREQ_153_600 -> 153.6 MHz */
+ {
+ /* pll_freq */
+ 816000000, /* 816 MHz */
+ /* nominal_incval */
+ 0x139b9b9baULL,
+ /* pps_delay */
+ 12,
+ },
+
+ /* ICE_TIME_REF_FREQ_156_250 -> 156.25 MHz */
+ {
+ /* pll_freq */
+ 830078125, /* 830.78125 MHz */
+ /* nominal_incval */
+ 0x134679aceULL,
+ /* pps_delay */
+ 11,
+ },
+
+ /* ICE_TIME_REF_FREQ_245_760 -> 245.76 MHz */
+ {
+ /* pll_freq */
+ 783360000, /* 783.36 MHz */
+ /* nominal_incval */
+ 0x146cc2177ULL,
+ /* pps_delay */
+ 12,
+ },
+};
+
+const struct ice_cgu_pll_params_e822 e822_cgu_params[NUM_ICE_TIME_REF_FREQ] = {
+ /* ICE_TIME_REF_FREQ_25_000 -> 25 MHz */
+ {
+ /* refclk_pre_div */
+ 1,
+ /* feedback_div */
+ 197,
+ /* frac_n_div */
+ 2621440,
+ /* post_pll_div */
+ 6,
+ },
+
+ /* ICE_TIME_REF_FREQ_122_880 -> 122.88 MHz */
+ {
+ /* refclk_pre_div */
+ 5,
+ /* feedback_div */
+ 223,
+ /* frac_n_div */
+ 524288,
+ /* post_pll_div */
+ 7,
+ },
+
+ /* ICE_TIME_REF_FREQ_125_000 -> 125 MHz */
+ {
+ /* refclk_pre_div */
+ 5,
+ /* feedback_div */
+ 223,
+ /* frac_n_div */
+ 524288,
+ /* post_pll_div */
+ 7,
+ },
+
+ /* ICE_TIME_REF_FREQ_153_600 -> 153.6 MHz */
+ {
+ /* refclk_pre_div */
+ 5,
+ /* feedback_div */
+ 159,
+ /* frac_n_div */
+ 1572864,
+ /* post_pll_div */
+ 6,
+ },
+
+ /* ICE_TIME_REF_FREQ_156_250 -> 156.25 MHz */
+ {
+ /* refclk_pre_div */
+ 5,
+ /* feedback_div */
+ 159,
+ /* frac_n_div */
+ 1572864,
+ /* post_pll_div */
+ 6,
+ },
+
+ /* ICE_TIME_REF_FREQ_245_760 -> 245.76 MHz */
+ {
+ /* refclk_pre_div */
+ 10,
+ /* feedback_div */
+ 223,
+ /* frac_n_div */
+ 524288,
+ /* post_pll_div */
+ 7,
+ },
+};
+
+/* struct ice_vernier_info_e822
+ *
+ * E822 hardware calibrates the delay of the timestamp indication from the
+ * actual packet transmission or reception during the initialization of the
+ * PHY. To do this, the hardware mechanism uses some conversions between the
+ * various clocks within the PHY block. This table defines constants used to
+ * calculate the correct conversion ratios in the PHY registers.
+ *
+ * Many of the values relate to the PAR/PCS clock conversion registers. For
+ * these registers, a value of 0 means that the associated register is not
+ * used by this link speed, and that the register should be cleared by writing
+ * 0. Other values specify the clock frequency in Hz.
+ */
+const struct ice_vernier_info_e822 e822_vernier[NUM_ICE_PTP_LNK_SPD] = {
+ /* ICE_PTP_LNK_SPD_1G */
+ {
+ /* tx_par_clk */
+ 31250000, /* 31.25 MHz */
+ /* rx_par_clk */
+ 31250000, /* 31.25 MHz */
+ /* tx_pcs_clk */
+ 125000000, /* 125 MHz */
+ /* rx_pcs_clk */
+ 125000000, /* 125 MHz */
+ /* tx_desk_rsgb_par */
+ 0, /* unused */
+ /* rx_desk_rsgb_par */
+ 0, /* unused */
+ /* tx_desk_rsgb_pcs */
+ 0, /* unused */
+ /* rx_desk_rsgb_pcs */
+ 0, /* unused */
+ /* tx_fixed_delay */
+ 25140,
+ /* pmd_adj_divisor */
+ 10000000,
+ /* rx_fixed_delay */
+ 17372,
+ },
+ /* ICE_PTP_LNK_SPD_10G */
+ {
+ /* tx_par_clk */
+ 257812500, /* 257.8125 MHz */
+ /* rx_par_clk */
+ 257812500, /* 257.8125 MHz */
+ /* tx_pcs_clk */
+ 156250000, /* 156.25 MHz */
+ /* rx_pcs_clk */
+ 156250000, /* 156.25 MHz */
+ /* tx_desk_rsgb_par */
+ 0, /* unused */
+ /* rx_desk_rsgb_par */
+ 0, /* unused */
+ /* tx_desk_rsgb_pcs */
+ 0, /* unused */
+ /* rx_desk_rsgb_pcs */
+ 0, /* unused */
+ /* tx_fixed_delay */
+ 6938,
+ /* pmd_adj_divisor */
+ 82500000,
+ /* rx_fixed_delay */
+ 6212,
+ },
+ /* ICE_PTP_LNK_SPD_25G */
+ {
+ /* tx_par_clk */
+ 644531250, /* 644.53125 MHZ */
+ /* rx_par_clk */
+ 644531250, /* 644.53125 MHz */
+ /* tx_pcs_clk */
+ 390625000, /* 390.625 MHz */
+ /* rx_pcs_clk */
+ 390625000, /* 390.625 MHz */
+ /* tx_desk_rsgb_par */
+ 0, /* unused */
+ /* rx_desk_rsgb_par */
+ 0, /* unused */
+ /* tx_desk_rsgb_pcs */
+ 0, /* unused */
+ /* rx_desk_rsgb_pcs */
+ 0, /* unused */
+ /* tx_fixed_delay */
+ 2778,
+ /* pmd_adj_divisor */
+ 206250000,
+ /* rx_fixed_delay */
+ 2491,
+ },
+ /* ICE_PTP_LNK_SPD_25G_RS */
+ {
+ /* tx_par_clk */
+ 0, /* unused */
+ /* rx_par_clk */
+ 0, /* unused */
+ /* tx_pcs_clk */
+ 0, /* unused */
+ /* rx_pcs_clk */
+ 0, /* unused */
+ /* tx_desk_rsgb_par */
+ 161132812, /* 162.1328125 MHz Reed Solomon gearbox */
+ /* rx_desk_rsgb_par */
+ 161132812, /* 162.1328125 MHz Reed Solomon gearbox */
+ /* tx_desk_rsgb_pcs */
+ 97656250, /* 97.62625 MHz Reed Solomon gearbox */
+ /* rx_desk_rsgb_pcs */
+ 97656250, /* 97.62625 MHz Reed Solomon gearbox */
+ /* tx_fixed_delay */
+ 3928,
+ /* pmd_adj_divisor */
+ 206250000,
+ /* rx_fixed_delay */
+ 29535,
+ },
+ /* ICE_PTP_LNK_SPD_40G */
+ {
+ /* tx_par_clk */
+ 257812500,
+ /* rx_par_clk */
+ 257812500,
+ /* tx_pcs_clk */
+ 156250000, /* 156.25 MHz */
+ /* rx_pcs_clk */
+ 156250000, /* 156.25 MHz */
+ /* tx_desk_rsgb_par */
+ 0, /* unused */
+ /* rx_desk_rsgb_par */
+ 156250000, /* 156.25 MHz deskew clock */
+ /* tx_desk_rsgb_pcs */
+ 0, /* unused */
+ /* rx_desk_rsgb_pcs */
+ 156250000, /* 156.25 MHz deskew clock */
+ /* tx_fixed_delay */
+ 5666,
+ /* pmd_adj_divisor */
+ 82500000,
+ /* rx_fixed_delay */
+ 4244,
+ },
+ /* ICE_PTP_LNK_SPD_50G */
+ {
+ /* tx_par_clk */
+ 644531250, /* 644.53125 MHZ */
+ /* rx_par_clk */
+ 644531250, /* 644.53125 MHZ */
+ /* tx_pcs_clk */
+ 390625000, /* 390.625 MHz */
+ /* rx_pcs_clk */
+ 390625000, /* 390.625 MHz */
+ /* tx_desk_rsgb_par */
+ 0, /* unused */
+ /* rx_desk_rsgb_par */
+ 195312500, /* 193.3125 MHz deskew clock */
+ /* tx_desk_rsgb_pcs */
+ 0, /* unused */
+ /* rx_desk_rsgb_pcs */
+ 195312500, /* 193.3125 MHz deskew clock */
+ /* tx_fixed_delay */
+ 2778,
+ /* pmd_adj_divisor */
+ 206250000,
+ /* rx_fixed_delay */
+ 2868,
+ },
+ /* ICE_PTP_LNK_SPD_50G_RS */
+ {
+ /* tx_par_clk */
+ 0, /* unused */
+ /* rx_par_clk */
+ 644531250, /* 644.53125 MHz */
+ /* tx_pcs_clk */
+ 0, /* unused */
+ /* rx_pcs_clk */
+ 644531250, /* 644.53125 MHz */
+ /* tx_desk_rsgb_par */
+ 322265625, /* 322.265625 MHz Reed Solomon gearbox */
+ /* rx_desk_rsgb_par */
+ 322265625, /* 322.265625 MHz Reed Solomon gearbox */
+ /* tx_desk_rsgb_pcs */
+ 644531250, /* 644.53125 MHz Reed Solomon gearbox */
+ /* rx_desk_rsgb_pcs */
+ 644531250, /* 644.53125 MHz Reed Solomon gearbox */
+ /* tx_fixed_delay */
+ 2095,
+ /* pmd_adj_divisor */
+ 206250000,
+ /* rx_fixed_delay */
+ 14524,
+ },
+ /* ICE_PTP_LNK_SPD_100G_RS */
+ {
+ /* tx_par_clk */
+ 0, /* unused */
+ /* rx_par_clk */
+ 644531250, /* 644.53125 MHz */
+ /* tx_pcs_clk */
+ 0, /* unused */
+ /* rx_pcs_clk */
+ 644531250, /* 644.53125 MHz */
+ /* tx_desk_rsgb_par */
+ 644531250, /* 644.53125 MHz Reed Solomon gearbox */
+ /* rx_desk_rsgb_par */
+ 644531250, /* 644.53125 MHz Reed Solomon gearbox */
+ /* tx_desk_rsgb_pcs */
+ 644531250, /* 644.53125 MHz Reed Solomon gearbox */
+ /* rx_desk_rsgb_pcs */
+ 644531250, /* 644.53125 MHz Reed Solomon gearbox */
+ /* tx_fixed_delay */
+ 1620,
+ /* pmd_adj_divisor */
+ 206250000,
+ /* rx_fixed_delay */
+ 7775,
+ },
+};
+
+#endif /* _ICE_PTP_CONSTS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
new file mode 100644
index 000000000..813acd6a4
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
@@ -0,0 +1,3444 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021, Intel Corporation. */
+
+#include <linux/delay.h>
+#include "ice_common.h"
+#include "ice_ptp_hw.h"
+#include "ice_ptp_consts.h"
+#include "ice_cgu_regs.h"
+
+/* Low level functions for interacting with and managing the device clock used
+ * for the Precision Time Protocol.
+ *
+ * The ice hardware represents the current time using three registers:
+ *
+ * GLTSYN_TIME_H GLTSYN_TIME_L GLTSYN_TIME_R
+ * +---------------+ +---------------+ +---------------+
+ * | 32 bits | | 32 bits | | 32 bits |
+ * +---------------+ +---------------+ +---------------+
+ *
+ * The registers are incremented every clock tick using a 40bit increment
+ * value defined over two registers:
+ *
+ * GLTSYN_INCVAL_H GLTSYN_INCVAL_L
+ * +---------------+ +---------------+
+ * | 8 bit s | | 32 bits |
+ * +---------------+ +---------------+
+ *
+ * The increment value is added to the GLSTYN_TIME_R and GLSTYN_TIME_L
+ * registers every clock source tick. Depending on the specific device
+ * configuration, the clock source frequency could be one of a number of
+ * values.
+ *
+ * For E810 devices, the increment frequency is 812.5 MHz
+ *
+ * For E822 devices the clock can be derived from different sources, and the
+ * increment has an effective frequency of one of the following:
+ * - 823.4375 MHz
+ * - 783.36 MHz
+ * - 796.875 MHz
+ * - 816 MHz
+ * - 830.078125 MHz
+ * - 783.36 MHz
+ *
+ * The hardware captures timestamps in the PHY for incoming packets, and for
+ * outgoing packets on request. To support this, the PHY maintains a timer
+ * that matches the lower 64 bits of the global source timer.
+ *
+ * In order to ensure that the PHY timers and the source timer are equivalent,
+ * shadow registers are used to prepare the desired initial values. A special
+ * sync command is issued to trigger copying from the shadow registers into
+ * the appropriate source and PHY registers simultaneously.
+ *
+ * The driver supports devices which have different PHYs with subtly different
+ * mechanisms to program and control the timers. We divide the devices into
+ * families named after the first major device, E810 and similar devices, and
+ * E822 and similar devices.
+ *
+ * - E822 based devices have additional support for fine grained Vernier
+ * calibration which requires significant setup
+ * - The layout of timestamp data in the PHY register blocks is different
+ * - The way timer synchronization commands are issued is different.
+ *
+ * To support this, very low level functions have an e810 or e822 suffix
+ * indicating what type of device they work on. Higher level abstractions for
+ * tasks that can be done on both devices do not have the suffix and will
+ * correctly look up the appropriate low level function when running.
+ *
+ * Functions which only make sense on a single device family may not have
+ * a suitable generic implementation
+ */
+
+/**
+ * ice_get_ptp_src_clock_index - determine source clock index
+ * @hw: pointer to HW struct
+ *
+ * Determine the source clock index currently in use, based on device
+ * capabilities reported during initialization.
+ */
+u8 ice_get_ptp_src_clock_index(struct ice_hw *hw)
+{
+ return hw->func_caps.ts_func_info.tmr_index_assoc;
+}
+
+/**
+ * ice_ptp_read_src_incval - Read source timer increment value
+ * @hw: pointer to HW struct
+ *
+ * Read the increment value of the source timer and return it.
+ */
+static u64 ice_ptp_read_src_incval(struct ice_hw *hw)
+{
+ u32 lo, hi;
+ u8 tmr_idx;
+
+ tmr_idx = ice_get_ptp_src_clock_index(hw);
+
+ lo = rd32(hw, GLTSYN_INCVAL_L(tmr_idx));
+ hi = rd32(hw, GLTSYN_INCVAL_H(tmr_idx));
+
+ return ((u64)(hi & INCVAL_HIGH_M) << 32) | lo;
+}
+
+/**
+ * ice_ptp_src_cmd - Prepare source timer for a timer command
+ * @hw: pointer to HW structure
+ * @cmd: Timer command
+ *
+ * Prepare the source timer for an upcoming timer sync command.
+ */
+static void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
+{
+ u32 cmd_val;
+ u8 tmr_idx;
+
+ tmr_idx = ice_get_ptp_src_clock_index(hw);
+ cmd_val = tmr_idx << SEL_CPK_SRC;
+
+ switch (cmd) {
+ case INIT_TIME:
+ cmd_val |= GLTSYN_CMD_INIT_TIME;
+ break;
+ case INIT_INCVAL:
+ cmd_val |= GLTSYN_CMD_INIT_INCVAL;
+ break;
+ case ADJ_TIME:
+ cmd_val |= GLTSYN_CMD_ADJ_TIME;
+ break;
+ case ADJ_TIME_AT_TIME:
+ cmd_val |= GLTSYN_CMD_ADJ_INIT_TIME;
+ break;
+ case READ_TIME:
+ cmd_val |= GLTSYN_CMD_READ_TIME;
+ break;
+ case ICE_PTP_NOP:
+ break;
+ }
+
+ wr32(hw, GLTSYN_CMD, cmd_val);
+}
+
+/**
+ * ice_ptp_exec_tmr_cmd - Execute all prepared timer commands
+ * @hw: pointer to HW struct
+ *
+ * Write the SYNC_EXEC_CMD bit to the GLTSYN_CMD_SYNC register, and flush the
+ * write immediately. This triggers the hardware to begin executing all of the
+ * source and PHY timer commands synchronously.
+ */
+static void ice_ptp_exec_tmr_cmd(struct ice_hw *hw)
+{
+ wr32(hw, GLTSYN_CMD_SYNC, SYNC_EXEC_CMD);
+ ice_flush(hw);
+}
+
+/* E822 family functions
+ *
+ * The following functions operate on the E822 family of devices.
+ */
+
+/**
+ * ice_fill_phy_msg_e822 - Fill message data for a PHY register access
+ * @msg: the PHY message buffer to fill in
+ * @port: the port to access
+ * @offset: the register offset
+ */
+static void
+ice_fill_phy_msg_e822(struct ice_sbq_msg_input *msg, u8 port, u16 offset)
+{
+ int phy_port, phy, quadtype;
+
+ phy_port = port % ICE_PORTS_PER_PHY;
+ phy = port / ICE_PORTS_PER_PHY;
+ quadtype = (port / ICE_PORTS_PER_QUAD) % ICE_NUM_QUAD_TYPE;
+
+ if (quadtype == 0) {
+ msg->msg_addr_low = P_Q0_L(P_0_BASE + offset, phy_port);
+ msg->msg_addr_high = P_Q0_H(P_0_BASE + offset, phy_port);
+ } else {
+ msg->msg_addr_low = P_Q1_L(P_4_BASE + offset, phy_port);
+ msg->msg_addr_high = P_Q1_H(P_4_BASE + offset, phy_port);
+ }
+
+ if (phy == 0)
+ msg->dest_dev = rmn_0;
+ else if (phy == 1)
+ msg->dest_dev = rmn_1;
+ else
+ msg->dest_dev = rmn_2;
+}
+
+/**
+ * ice_is_64b_phy_reg_e822 - Check if this is a 64bit PHY register
+ * @low_addr: the low address to check
+ * @high_addr: on return, contains the high address of the 64bit register
+ *
+ * Checks if the provided low address is one of the known 64bit PHY values
+ * represented as two 32bit registers. If it is, return the appropriate high
+ * register offset to use.
+ */
+static bool ice_is_64b_phy_reg_e822(u16 low_addr, u16 *high_addr)
+{
+ switch (low_addr) {
+ case P_REG_PAR_PCS_TX_OFFSET_L:
+ *high_addr = P_REG_PAR_PCS_TX_OFFSET_U;
+ return true;
+ case P_REG_PAR_PCS_RX_OFFSET_L:
+ *high_addr = P_REG_PAR_PCS_RX_OFFSET_U;
+ return true;
+ case P_REG_PAR_TX_TIME_L:
+ *high_addr = P_REG_PAR_TX_TIME_U;
+ return true;
+ case P_REG_PAR_RX_TIME_L:
+ *high_addr = P_REG_PAR_RX_TIME_U;
+ return true;
+ case P_REG_TOTAL_TX_OFFSET_L:
+ *high_addr = P_REG_TOTAL_TX_OFFSET_U;
+ return true;
+ case P_REG_TOTAL_RX_OFFSET_L:
+ *high_addr = P_REG_TOTAL_RX_OFFSET_U;
+ return true;
+ case P_REG_UIX66_10G_40G_L:
+ *high_addr = P_REG_UIX66_10G_40G_U;
+ return true;
+ case P_REG_UIX66_25G_100G_L:
+ *high_addr = P_REG_UIX66_25G_100G_U;
+ return true;
+ case P_REG_TX_CAPTURE_L:
+ *high_addr = P_REG_TX_CAPTURE_U;
+ return true;
+ case P_REG_RX_CAPTURE_L:
+ *high_addr = P_REG_RX_CAPTURE_U;
+ return true;
+ case P_REG_TX_TIMER_INC_PRE_L:
+ *high_addr = P_REG_TX_TIMER_INC_PRE_U;
+ return true;
+ case P_REG_RX_TIMER_INC_PRE_L:
+ *high_addr = P_REG_RX_TIMER_INC_PRE_U;
+ return true;
+ default:
+ return false;
+ }
+}
+
+/**
+ * ice_is_40b_phy_reg_e822 - Check if this is a 40bit PHY register
+ * @low_addr: the low address to check
+ * @high_addr: on return, contains the high address of the 40bit value
+ *
+ * Checks if the provided low address is one of the known 40bit PHY values
+ * split into two registers with the lower 8 bits in the low register and the
+ * upper 32 bits in the high register. If it is, return the appropriate high
+ * register offset to use.
+ */
+static bool ice_is_40b_phy_reg_e822(u16 low_addr, u16 *high_addr)
+{
+ switch (low_addr) {
+ case P_REG_TIMETUS_L:
+ *high_addr = P_REG_TIMETUS_U;
+ return true;
+ case P_REG_PAR_RX_TUS_L:
+ *high_addr = P_REG_PAR_RX_TUS_U;
+ return true;
+ case P_REG_PAR_TX_TUS_L:
+ *high_addr = P_REG_PAR_TX_TUS_U;
+ return true;
+ case P_REG_PCS_RX_TUS_L:
+ *high_addr = P_REG_PCS_RX_TUS_U;
+ return true;
+ case P_REG_PCS_TX_TUS_L:
+ *high_addr = P_REG_PCS_TX_TUS_U;
+ return true;
+ case P_REG_DESK_PAR_RX_TUS_L:
+ *high_addr = P_REG_DESK_PAR_RX_TUS_U;
+ return true;
+ case P_REG_DESK_PAR_TX_TUS_L:
+ *high_addr = P_REG_DESK_PAR_TX_TUS_U;
+ return true;
+ case P_REG_DESK_PCS_RX_TUS_L:
+ *high_addr = P_REG_DESK_PCS_RX_TUS_U;
+ return true;
+ case P_REG_DESK_PCS_TX_TUS_L:
+ *high_addr = P_REG_DESK_PCS_TX_TUS_U;
+ return true;
+ default:
+ return false;
+ }
+}
+
+/**
+ * ice_read_phy_reg_e822 - Read a PHY register
+ * @hw: pointer to the HW struct
+ * @port: PHY port to read from
+ * @offset: PHY register offset to read
+ * @val: on return, the contents read from the PHY
+ *
+ * Read a PHY register for the given port over the device sideband queue.
+ */
+int
+ice_read_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 *val)
+{
+ struct ice_sbq_msg_input msg = {0};
+ int err;
+
+ ice_fill_phy_msg_e822(&msg, port, offset);
+ msg.opcode = ice_sbq_msg_rd;
+
+ err = ice_sbq_rw_reg(hw, &msg);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
+ err);
+ return err;
+ }
+
+ *val = msg.data;
+
+ return 0;
+}
+
+/**
+ * ice_read_64b_phy_reg_e822 - Read a 64bit value from PHY registers
+ * @hw: pointer to the HW struct
+ * @port: PHY port to read from
+ * @low_addr: offset of the lower register to read from
+ * @val: on return, the contents of the 64bit value from the PHY registers
+ *
+ * Reads the two registers associated with a 64bit value and returns it in the
+ * val pointer. The offset always specifies the lower register offset to use.
+ * The high offset is looked up. This function only operates on registers
+ * known to be two parts of a 64bit value.
+ */
+static int
+ice_read_64b_phy_reg_e822(struct ice_hw *hw, u8 port, u16 low_addr, u64 *val)
+{
+ u32 low, high;
+ u16 high_addr;
+ int err;
+
+ /* Only operate on registers known to be split into two 32bit
+ * registers.
+ */
+ if (!ice_is_64b_phy_reg_e822(low_addr, &high_addr)) {
+ ice_debug(hw, ICE_DBG_PTP, "Invalid 64b register addr 0x%08x\n",
+ low_addr);
+ return -EINVAL;
+ }
+
+ err = ice_read_phy_reg_e822(hw, port, low_addr, &low);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read from low register 0x%08x\n, err %d",
+ low_addr, err);
+ return err;
+ }
+
+ err = ice_read_phy_reg_e822(hw, port, high_addr, &high);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read from high register 0x%08x\n, err %d",
+ high_addr, err);
+ return err;
+ }
+
+ *val = (u64)high << 32 | low;
+
+ return 0;
+}
+
+/**
+ * ice_write_phy_reg_e822 - Write a PHY register
+ * @hw: pointer to the HW struct
+ * @port: PHY port to write to
+ * @offset: PHY register offset to write
+ * @val: The value to write to the register
+ *
+ * Write a PHY register for the given port over the device sideband queue.
+ */
+int
+ice_write_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 val)
+{
+ struct ice_sbq_msg_input msg = {0};
+ int err;
+
+ ice_fill_phy_msg_e822(&msg, port, offset);
+ msg.opcode = ice_sbq_msg_wr;
+ msg.data = val;
+
+ err = ice_sbq_rw_reg(hw, &msg);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
+ err);
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_write_40b_phy_reg_e822 - Write a 40b value to the PHY
+ * @hw: pointer to the HW struct
+ * @port: port to write to
+ * @low_addr: offset of the low register
+ * @val: 40b value to write
+ *
+ * Write the provided 40b value to the two associated registers by splitting
+ * it up into two chunks, the lower 8 bits and the upper 32 bits.
+ */
+static int
+ice_write_40b_phy_reg_e822(struct ice_hw *hw, u8 port, u16 low_addr, u64 val)
+{
+ u32 low, high;
+ u16 high_addr;
+ int err;
+
+ /* Only operate on registers known to be split into a lower 8 bit
+ * register and an upper 32 bit register.
+ */
+ if (!ice_is_40b_phy_reg_e822(low_addr, &high_addr)) {
+ ice_debug(hw, ICE_DBG_PTP, "Invalid 40b register addr 0x%08x\n",
+ low_addr);
+ return -EINVAL;
+ }
+
+ low = (u32)(val & P_REG_40B_LOW_M);
+ high = (u32)(val >> P_REG_40B_HIGH_S);
+
+ err = ice_write_phy_reg_e822(hw, port, low_addr, low);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write to low register 0x%08x\n, err %d",
+ low_addr, err);
+ return err;
+ }
+
+ err = ice_write_phy_reg_e822(hw, port, high_addr, high);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write to high register 0x%08x\n, err %d",
+ high_addr, err);
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_write_64b_phy_reg_e822 - Write a 64bit value to PHY registers
+ * @hw: pointer to the HW struct
+ * @port: PHY port to read from
+ * @low_addr: offset of the lower register to read from
+ * @val: the contents of the 64bit value to write to PHY
+ *
+ * Write the 64bit value to the two associated 32bit PHY registers. The offset
+ * is always specified as the lower register, and the high address is looked
+ * up. This function only operates on registers known to be two parts of
+ * a 64bit value.
+ */
+static int
+ice_write_64b_phy_reg_e822(struct ice_hw *hw, u8 port, u16 low_addr, u64 val)
+{
+ u32 low, high;
+ u16 high_addr;
+ int err;
+
+ /* Only operate on registers known to be split into two 32bit
+ * registers.
+ */
+ if (!ice_is_64b_phy_reg_e822(low_addr, &high_addr)) {
+ ice_debug(hw, ICE_DBG_PTP, "Invalid 64b register addr 0x%08x\n",
+ low_addr);
+ return -EINVAL;
+ }
+
+ low = lower_32_bits(val);
+ high = upper_32_bits(val);
+
+ err = ice_write_phy_reg_e822(hw, port, low_addr, low);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write to low register 0x%08x\n, err %d",
+ low_addr, err);
+ return err;
+ }
+
+ err = ice_write_phy_reg_e822(hw, port, high_addr, high);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write to high register 0x%08x\n, err %d",
+ high_addr, err);
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_fill_quad_msg_e822 - Fill message data for quad register access
+ * @msg: the PHY message buffer to fill in
+ * @quad: the quad to access
+ * @offset: the register offset
+ *
+ * Fill a message buffer for accessing a register in a quad shared between
+ * multiple PHYs.
+ */
+static void
+ice_fill_quad_msg_e822(struct ice_sbq_msg_input *msg, u8 quad, u16 offset)
+{
+ u32 addr;
+
+ msg->dest_dev = rmn_0;
+
+ if ((quad % ICE_NUM_QUAD_TYPE) == 0)
+ addr = Q_0_BASE + offset;
+ else
+ addr = Q_1_BASE + offset;
+
+ msg->msg_addr_low = lower_16_bits(addr);
+ msg->msg_addr_high = upper_16_bits(addr);
+}
+
+/**
+ * ice_read_quad_reg_e822 - Read a PHY quad register
+ * @hw: pointer to the HW struct
+ * @quad: quad to read from
+ * @offset: quad register offset to read
+ * @val: on return, the contents read from the quad
+ *
+ * Read a quad register over the device sideband queue. Quad registers are
+ * shared between multiple PHYs.
+ */
+int
+ice_read_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 *val)
+{
+ struct ice_sbq_msg_input msg = {0};
+ int err;
+
+ if (quad >= ICE_MAX_QUAD)
+ return -EINVAL;
+
+ ice_fill_quad_msg_e822(&msg, quad, offset);
+ msg.opcode = ice_sbq_msg_rd;
+
+ err = ice_sbq_rw_reg(hw, &msg);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
+ err);
+ return err;
+ }
+
+ *val = msg.data;
+
+ return 0;
+}
+
+/**
+ * ice_write_quad_reg_e822 - Write a PHY quad register
+ * @hw: pointer to the HW struct
+ * @quad: quad to write to
+ * @offset: quad register offset to write
+ * @val: The value to write to the register
+ *
+ * Write a quad register over the device sideband queue. Quad registers are
+ * shared between multiple PHYs.
+ */
+int
+ice_write_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 val)
+{
+ struct ice_sbq_msg_input msg = {0};
+ int err;
+
+ if (quad >= ICE_MAX_QUAD)
+ return -EINVAL;
+
+ ice_fill_quad_msg_e822(&msg, quad, offset);
+ msg.opcode = ice_sbq_msg_wr;
+ msg.data = val;
+
+ err = ice_sbq_rw_reg(hw, &msg);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
+ err);
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_read_phy_tstamp_e822 - Read a PHY timestamp out of the quad block
+ * @hw: pointer to the HW struct
+ * @quad: the quad to read from
+ * @idx: the timestamp index to read
+ * @tstamp: on return, the 40bit timestamp value
+ *
+ * Read a 40bit timestamp value out of the two associated registers in the
+ * quad memory block that is shared between the internal PHYs of the E822
+ * family of devices.
+ */
+static int
+ice_read_phy_tstamp_e822(struct ice_hw *hw, u8 quad, u8 idx, u64 *tstamp)
+{
+ u16 lo_addr, hi_addr;
+ u32 lo, hi;
+ int err;
+
+ lo_addr = (u16)TS_L(Q_REG_TX_MEMORY_BANK_START, idx);
+ hi_addr = (u16)TS_H(Q_REG_TX_MEMORY_BANK_START, idx);
+
+ err = ice_read_quad_reg_e822(hw, quad, lo_addr, &lo);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read low PTP timestamp register, err %d\n",
+ err);
+ return err;
+ }
+
+ err = ice_read_quad_reg_e822(hw, quad, hi_addr, &hi);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read high PTP timestamp register, err %d\n",
+ err);
+ return err;
+ }
+
+ /* For E822 based internal PHYs, the timestamp is reported with the
+ * lower 8 bits in the low register, and the upper 32 bits in the high
+ * register.
+ */
+ *tstamp = ((u64)hi) << TS_PHY_HIGH_S | ((u64)lo & TS_PHY_LOW_M);
+
+ return 0;
+}
+
+/**
+ * ice_clear_phy_tstamp_e822 - Clear a timestamp from the quad block
+ * @hw: pointer to the HW struct
+ * @quad: the quad to read from
+ * @idx: the timestamp index to reset
+ *
+ * Clear a timestamp, resetting its valid bit, from the PHY quad block that is
+ * shared between the internal PHYs on the E822 devices.
+ */
+static int
+ice_clear_phy_tstamp_e822(struct ice_hw *hw, u8 quad, u8 idx)
+{
+ u16 lo_addr, hi_addr;
+ int err;
+
+ lo_addr = (u16)TS_L(Q_REG_TX_MEMORY_BANK_START, idx);
+ hi_addr = (u16)TS_H(Q_REG_TX_MEMORY_BANK_START, idx);
+
+ err = ice_write_quad_reg_e822(hw, quad, lo_addr, 0);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register, err %d\n",
+ err);
+ return err;
+ }
+
+ err = ice_write_quad_reg_e822(hw, quad, hi_addr, 0);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register, err %d\n",
+ err);
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_read_cgu_reg_e822 - Read a CGU register
+ * @hw: pointer to the HW struct
+ * @addr: Register address to read
+ * @val: storage for register value read
+ *
+ * Read the contents of a register of the Clock Generation Unit. Only
+ * applicable to E822 devices.
+ */
+static int
+ice_read_cgu_reg_e822(struct ice_hw *hw, u32 addr, u32 *val)
+{
+ struct ice_sbq_msg_input cgu_msg;
+ int err;
+
+ cgu_msg.opcode = ice_sbq_msg_rd;
+ cgu_msg.dest_dev = cgu;
+ cgu_msg.msg_addr_low = addr;
+ cgu_msg.msg_addr_high = 0x0;
+
+ err = ice_sbq_rw_reg(hw, &cgu_msg);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read CGU register 0x%04x, err %d\n",
+ addr, err);
+ return err;
+ }
+
+ *val = cgu_msg.data;
+
+ return err;
+}
+
+/**
+ * ice_write_cgu_reg_e822 - Write a CGU register
+ * @hw: pointer to the HW struct
+ * @addr: Register address to write
+ * @val: value to write into the register
+ *
+ * Write the specified value to a register of the Clock Generation Unit. Only
+ * applicable to E822 devices.
+ */
+static int
+ice_write_cgu_reg_e822(struct ice_hw *hw, u32 addr, u32 val)
+{
+ struct ice_sbq_msg_input cgu_msg;
+ int err;
+
+ cgu_msg.opcode = ice_sbq_msg_wr;
+ cgu_msg.dest_dev = cgu;
+ cgu_msg.msg_addr_low = addr;
+ cgu_msg.msg_addr_high = 0x0;
+ cgu_msg.data = val;
+
+ err = ice_sbq_rw_reg(hw, &cgu_msg);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write CGU register 0x%04x, err %d\n",
+ addr, err);
+ return err;
+ }
+
+ return err;
+}
+
+/**
+ * ice_clk_freq_str - Convert time_ref_freq to string
+ * @clk_freq: Clock frequency
+ *
+ * Convert the specified TIME_REF clock frequency to a string.
+ */
+static const char *ice_clk_freq_str(u8 clk_freq)
+{
+ switch ((enum ice_time_ref_freq)clk_freq) {
+ case ICE_TIME_REF_FREQ_25_000:
+ return "25 MHz";
+ case ICE_TIME_REF_FREQ_122_880:
+ return "122.88 MHz";
+ case ICE_TIME_REF_FREQ_125_000:
+ return "125 MHz";
+ case ICE_TIME_REF_FREQ_153_600:
+ return "153.6 MHz";
+ case ICE_TIME_REF_FREQ_156_250:
+ return "156.25 MHz";
+ case ICE_TIME_REF_FREQ_245_760:
+ return "245.76 MHz";
+ default:
+ return "Unknown";
+ }
+}
+
+/**
+ * ice_clk_src_str - Convert time_ref_src to string
+ * @clk_src: Clock source
+ *
+ * Convert the specified clock source to its string name.
+ */
+static const char *ice_clk_src_str(u8 clk_src)
+{
+ switch ((enum ice_clk_src)clk_src) {
+ case ICE_CLK_SRC_TCX0:
+ return "TCX0";
+ case ICE_CLK_SRC_TIME_REF:
+ return "TIME_REF";
+ default:
+ return "Unknown";
+ }
+}
+
+/**
+ * ice_cfg_cgu_pll_e822 - Configure the Clock Generation Unit
+ * @hw: pointer to the HW struct
+ * @clk_freq: Clock frequency to program
+ * @clk_src: Clock source to select (TIME_REF, or TCX0)
+ *
+ * Configure the Clock Generation Unit with the desired clock frequency and
+ * time reference, enabling the PLL which drives the PTP hardware clock.
+ */
+static int
+ice_cfg_cgu_pll_e822(struct ice_hw *hw, enum ice_time_ref_freq clk_freq,
+ enum ice_clk_src clk_src)
+{
+ union tspll_ro_bwm_lf bwm_lf;
+ union nac_cgu_dword19 dw19;
+ union nac_cgu_dword22 dw22;
+ union nac_cgu_dword24 dw24;
+ union nac_cgu_dword9 dw9;
+ int err;
+
+ if (clk_freq >= NUM_ICE_TIME_REF_FREQ) {
+ dev_warn(ice_hw_to_dev(hw), "Invalid TIME_REF frequency %u\n",
+ clk_freq);
+ return -EINVAL;
+ }
+
+ if (clk_src >= NUM_ICE_CLK_SRC) {
+ dev_warn(ice_hw_to_dev(hw), "Invalid clock source %u\n",
+ clk_src);
+ return -EINVAL;
+ }
+
+ if (clk_src == ICE_CLK_SRC_TCX0 &&
+ clk_freq != ICE_TIME_REF_FREQ_25_000) {
+ dev_warn(ice_hw_to_dev(hw),
+ "TCX0 only supports 25 MHz frequency\n");
+ return -EINVAL;
+ }
+
+ err = ice_read_cgu_reg_e822(hw, NAC_CGU_DWORD9, &dw9.val);
+ if (err)
+ return err;
+
+ err = ice_read_cgu_reg_e822(hw, NAC_CGU_DWORD24, &dw24.val);
+ if (err)
+ return err;
+
+ err = ice_read_cgu_reg_e822(hw, TSPLL_RO_BWM_LF, &bwm_lf.val);
+ if (err)
+ return err;
+
+ /* Log the current clock configuration */
+ ice_debug(hw, ICE_DBG_PTP, "Current CGU configuration -- %s, clk_src %s, clk_freq %s, PLL %s\n",
+ dw24.field.ts_pll_enable ? "enabled" : "disabled",
+ ice_clk_src_str(dw24.field.time_ref_sel),
+ ice_clk_freq_str(dw9.field.time_ref_freq_sel),
+ bwm_lf.field.plllock_true_lock_cri ? "locked" : "unlocked");
+
+ /* Disable the PLL before changing the clock source or frequency */
+ if (dw24.field.ts_pll_enable) {
+ dw24.field.ts_pll_enable = 0;
+
+ err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD24, dw24.val);
+ if (err)
+ return err;
+ }
+
+ /* Set the frequency */
+ dw9.field.time_ref_freq_sel = clk_freq;
+ err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD9, dw9.val);
+ if (err)
+ return err;
+
+ /* Configure the TS PLL feedback divisor */
+ err = ice_read_cgu_reg_e822(hw, NAC_CGU_DWORD19, &dw19.val);
+ if (err)
+ return err;
+
+ dw19.field.tspll_fbdiv_intgr = e822_cgu_params[clk_freq].feedback_div;
+ dw19.field.tspll_ndivratio = 1;
+
+ err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD19, dw19.val);
+ if (err)
+ return err;
+
+ /* Configure the TS PLL post divisor */
+ err = ice_read_cgu_reg_e822(hw, NAC_CGU_DWORD22, &dw22.val);
+ if (err)
+ return err;
+
+ dw22.field.time1588clk_div = e822_cgu_params[clk_freq].post_pll_div;
+ dw22.field.time1588clk_sel_div2 = 0;
+
+ err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD22, dw22.val);
+ if (err)
+ return err;
+
+ /* Configure the TS PLL pre divisor and clock source */
+ err = ice_read_cgu_reg_e822(hw, NAC_CGU_DWORD24, &dw24.val);
+ if (err)
+ return err;
+
+ dw24.field.ref1588_ck_div = e822_cgu_params[clk_freq].refclk_pre_div;
+ dw24.field.tspll_fbdiv_frac = e822_cgu_params[clk_freq].frac_n_div;
+ dw24.field.time_ref_sel = clk_src;
+
+ err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD24, dw24.val);
+ if (err)
+ return err;
+
+ /* Finally, enable the PLL */
+ dw24.field.ts_pll_enable = 1;
+
+ err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD24, dw24.val);
+ if (err)
+ return err;
+
+ /* Wait to verify if the PLL locks */
+ usleep_range(1000, 5000);
+
+ err = ice_read_cgu_reg_e822(hw, TSPLL_RO_BWM_LF, &bwm_lf.val);
+ if (err)
+ return err;
+
+ if (!bwm_lf.field.plllock_true_lock_cri) {
+ dev_warn(ice_hw_to_dev(hw), "CGU PLL failed to lock\n");
+ return -EBUSY;
+ }
+
+ /* Log the current clock configuration */
+ ice_debug(hw, ICE_DBG_PTP, "New CGU configuration -- %s, clk_src %s, clk_freq %s, PLL %s\n",
+ dw24.field.ts_pll_enable ? "enabled" : "disabled",
+ ice_clk_src_str(dw24.field.time_ref_sel),
+ ice_clk_freq_str(dw9.field.time_ref_freq_sel),
+ bwm_lf.field.plllock_true_lock_cri ? "locked" : "unlocked");
+
+ return 0;
+}
+
+/**
+ * ice_init_cgu_e822 - Initialize CGU with settings from firmware
+ * @hw: pointer to the HW structure
+ *
+ * Initialize the Clock Generation Unit of the E822 device.
+ */
+static int ice_init_cgu_e822(struct ice_hw *hw)
+{
+ struct ice_ts_func_info *ts_info = &hw->func_caps.ts_func_info;
+ union tspll_cntr_bist_settings cntr_bist;
+ int err;
+
+ err = ice_read_cgu_reg_e822(hw, TSPLL_CNTR_BIST_SETTINGS,
+ &cntr_bist.val);
+ if (err)
+ return err;
+
+ /* Disable sticky lock detection so lock err reported is accurate */
+ cntr_bist.field.i_plllock_sel_0 = 0;
+ cntr_bist.field.i_plllock_sel_1 = 0;
+
+ err = ice_write_cgu_reg_e822(hw, TSPLL_CNTR_BIST_SETTINGS,
+ cntr_bist.val);
+ if (err)
+ return err;
+
+ /* Configure the CGU PLL using the parameters from the function
+ * capabilities.
+ */
+ err = ice_cfg_cgu_pll_e822(hw, ts_info->time_ref,
+ (enum ice_clk_src)ts_info->clk_src);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/**
+ * ice_ptp_set_vernier_wl - Set the window length for vernier calibration
+ * @hw: pointer to the HW struct
+ *
+ * Set the window length used for the vernier port calibration process.
+ */
+static int ice_ptp_set_vernier_wl(struct ice_hw *hw)
+{
+ u8 port;
+
+ for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) {
+ int err;
+
+ err = ice_write_phy_reg_e822(hw, port, P_REG_WL,
+ PTP_VERNIER_WL);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to set vernier window length for port %u, err %d\n",
+ port, err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_ptp_init_phc_e822 - Perform E822 specific PHC initialization
+ * @hw: pointer to HW struct
+ *
+ * Perform PHC initialization steps specific to E822 devices.
+ */
+static int ice_ptp_init_phc_e822(struct ice_hw *hw)
+{
+ int err;
+ u32 regval;
+
+ /* Enable reading switch and PHY registers over the sideband queue */
+#define PF_SB_REM_DEV_CTL_SWITCH_READ BIT(1)
+#define PF_SB_REM_DEV_CTL_PHY0 BIT(2)
+ regval = rd32(hw, PF_SB_REM_DEV_CTL);
+ regval |= (PF_SB_REM_DEV_CTL_SWITCH_READ |
+ PF_SB_REM_DEV_CTL_PHY0);
+ wr32(hw, PF_SB_REM_DEV_CTL, regval);
+
+ /* Initialize the Clock Generation Unit */
+ err = ice_init_cgu_e822(hw);
+ if (err)
+ return err;
+
+ /* Set window length for all the ports */
+ return ice_ptp_set_vernier_wl(hw);
+}
+
+/**
+ * ice_ptp_prep_phy_time_e822 - Prepare PHY port with initial time
+ * @hw: pointer to the HW struct
+ * @time: Time to initialize the PHY port clocks to
+ *
+ * Program the PHY port registers with a new initial time value. The port
+ * clock will be initialized once the driver issues an INIT_TIME sync
+ * command. The time value is the upper 32 bits of the PHY timer, usually in
+ * units of nominal nanoseconds.
+ */
+static int
+ice_ptp_prep_phy_time_e822(struct ice_hw *hw, u32 time)
+{
+ u64 phy_time;
+ u8 port;
+ int err;
+
+ /* The time represents the upper 32 bits of the PHY timer, so we need
+ * to shift to account for this when programming.
+ */
+ phy_time = (u64)time << 32;
+
+ for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) {
+ /* Tx case */
+ err = ice_write_64b_phy_reg_e822(hw, port,
+ P_REG_TX_TIMER_INC_PRE_L,
+ phy_time);
+ if (err)
+ goto exit_err;
+
+ /* Rx case */
+ err = ice_write_64b_phy_reg_e822(hw, port,
+ P_REG_RX_TIMER_INC_PRE_L,
+ phy_time);
+ if (err)
+ goto exit_err;
+ }
+
+ return 0;
+
+exit_err:
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write init time for port %u, err %d\n",
+ port, err);
+
+ return err;
+}
+
+/**
+ * ice_ptp_prep_port_adj_e822 - Prepare a single port for time adjust
+ * @hw: pointer to HW struct
+ * @port: Port number to be programmed
+ * @time: time in cycles to adjust the port Tx and Rx clocks
+ *
+ * Program the port for an atomic adjustment by writing the Tx and Rx timer
+ * registers. The atomic adjustment won't be completed until the driver issues
+ * an ADJ_TIME command.
+ *
+ * Note that time is not in units of nanoseconds. It is in clock time
+ * including the lower sub-nanosecond portion of the port timer.
+ *
+ * Negative adjustments are supported using 2s complement arithmetic.
+ */
+int
+ice_ptp_prep_port_adj_e822(struct ice_hw *hw, u8 port, s64 time)
+{
+ u32 l_time, u_time;
+ int err;
+
+ l_time = lower_32_bits(time);
+ u_time = upper_32_bits(time);
+
+ /* Tx case */
+ err = ice_write_phy_reg_e822(hw, port, P_REG_TX_TIMER_INC_PRE_L,
+ l_time);
+ if (err)
+ goto exit_err;
+
+ err = ice_write_phy_reg_e822(hw, port, P_REG_TX_TIMER_INC_PRE_U,
+ u_time);
+ if (err)
+ goto exit_err;
+
+ /* Rx case */
+ err = ice_write_phy_reg_e822(hw, port, P_REG_RX_TIMER_INC_PRE_L,
+ l_time);
+ if (err)
+ goto exit_err;
+
+ err = ice_write_phy_reg_e822(hw, port, P_REG_RX_TIMER_INC_PRE_U,
+ u_time);
+ if (err)
+ goto exit_err;
+
+ return 0;
+
+exit_err:
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write time adjust for port %u, err %d\n",
+ port, err);
+ return err;
+}
+
+/**
+ * ice_ptp_prep_phy_adj_e822 - Prep PHY ports for a time adjustment
+ * @hw: pointer to HW struct
+ * @adj: adjustment in nanoseconds
+ *
+ * Prepare the PHY ports for an atomic time adjustment by programming the PHY
+ * Tx and Rx port registers. The actual adjustment is completed by issuing an
+ * ADJ_TIME or ADJ_TIME_AT_TIME sync command.
+ */
+static int
+ice_ptp_prep_phy_adj_e822(struct ice_hw *hw, s32 adj)
+{
+ s64 cycles;
+ u8 port;
+
+ /* The port clock supports adjustment of the sub-nanosecond portion of
+ * the clock. We shift the provided adjustment in nanoseconds to
+ * calculate the appropriate adjustment to program into the PHY ports.
+ */
+ if (adj > 0)
+ cycles = (s64)adj << 32;
+ else
+ cycles = -(((s64)-adj) << 32);
+
+ for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) {
+ int err;
+
+ err = ice_ptp_prep_port_adj_e822(hw, port, cycles);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_ptp_prep_phy_incval_e822 - Prepare PHY ports for time adjustment
+ * @hw: pointer to HW struct
+ * @incval: new increment value to prepare
+ *
+ * Prepare each of the PHY ports for a new increment value by programming the
+ * port's TIMETUS registers. The new increment value will be updated after
+ * issuing an INIT_INCVAL command.
+ */
+static int
+ice_ptp_prep_phy_incval_e822(struct ice_hw *hw, u64 incval)
+{
+ int err;
+ u8 port;
+
+ for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) {
+ err = ice_write_40b_phy_reg_e822(hw, port, P_REG_TIMETUS_L,
+ incval);
+ if (err)
+ goto exit_err;
+ }
+
+ return 0;
+
+exit_err:
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write incval for port %u, err %d\n",
+ port, err);
+
+ return err;
+}
+
+/**
+ * ice_ptp_read_port_capture - Read a port's local time capture
+ * @hw: pointer to HW struct
+ * @port: Port number to read
+ * @tx_ts: on return, the Tx port time capture
+ * @rx_ts: on return, the Rx port time capture
+ *
+ * Read the port's Tx and Rx local time capture values.
+ *
+ * Note this has no equivalent for the E810 devices.
+ */
+static int
+ice_ptp_read_port_capture(struct ice_hw *hw, u8 port, u64 *tx_ts, u64 *rx_ts)
+{
+ int err;
+
+ /* Tx case */
+ err = ice_read_64b_phy_reg_e822(hw, port, P_REG_TX_CAPTURE_L, tx_ts);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read REG_TX_CAPTURE, err %d\n",
+ err);
+ return err;
+ }
+
+ ice_debug(hw, ICE_DBG_PTP, "tx_init = 0x%016llx\n",
+ (unsigned long long)*tx_ts);
+
+ /* Rx case */
+ err = ice_read_64b_phy_reg_e822(hw, port, P_REG_RX_CAPTURE_L, rx_ts);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_CAPTURE, err %d\n",
+ err);
+ return err;
+ }
+
+ ice_debug(hw, ICE_DBG_PTP, "rx_init = 0x%016llx\n",
+ (unsigned long long)*rx_ts);
+
+ return 0;
+}
+
+/**
+ * ice_ptp_write_port_cmd_e822 - Prepare a single PHY port for a timer command
+ * @hw: pointer to HW struct
+ * @port: Port to which cmd has to be sent
+ * @cmd: Command to be sent to the port
+ *
+ * Prepare the requested port for an upcoming timer sync command.
+ *
+ * Do not use this function directly. If you want to configure exactly one
+ * port, use ice_ptp_one_port_cmd() instead.
+ */
+static int
+ice_ptp_write_port_cmd_e822(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd)
+{
+ u32 cmd_val, val;
+ u8 tmr_idx;
+ int err;
+
+ tmr_idx = ice_get_ptp_src_clock_index(hw);
+ cmd_val = tmr_idx << SEL_PHY_SRC;
+ switch (cmd) {
+ case INIT_TIME:
+ cmd_val |= PHY_CMD_INIT_TIME;
+ break;
+ case INIT_INCVAL:
+ cmd_val |= PHY_CMD_INIT_INCVAL;
+ break;
+ case ADJ_TIME:
+ cmd_val |= PHY_CMD_ADJ_TIME;
+ break;
+ case READ_TIME:
+ cmd_val |= PHY_CMD_READ_TIME;
+ break;
+ case ADJ_TIME_AT_TIME:
+ cmd_val |= PHY_CMD_ADJ_TIME_AT_TIME;
+ break;
+ case ICE_PTP_NOP:
+ break;
+ }
+
+ /* Tx case */
+ /* Read, modify, write */
+ err = ice_read_phy_reg_e822(hw, port, P_REG_TX_TMR_CMD, &val);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_TMR_CMD, err %d\n",
+ err);
+ return err;
+ }
+
+ /* Modify necessary bits only and perform write */
+ val &= ~TS_CMD_MASK;
+ val |= cmd_val;
+
+ err = ice_write_phy_reg_e822(hw, port, P_REG_TX_TMR_CMD, val);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write back TX_TMR_CMD, err %d\n",
+ err);
+ return err;
+ }
+
+ /* Rx case */
+ /* Read, modify, write */
+ err = ice_read_phy_reg_e822(hw, port, P_REG_RX_TMR_CMD, &val);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_TMR_CMD, err %d\n",
+ err);
+ return err;
+ }
+
+ /* Modify necessary bits only and perform write */
+ val &= ~TS_CMD_MASK;
+ val |= cmd_val;
+
+ err = ice_write_phy_reg_e822(hw, port, P_REG_RX_TMR_CMD, val);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write back RX_TMR_CMD, err %d\n",
+ err);
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_ptp_one_port_cmd - Prepare one port for a timer command
+ * @hw: pointer to the HW struct
+ * @configured_port: the port to configure with configured_cmd
+ * @configured_cmd: timer command to prepare on the configured_port
+ *
+ * Prepare the configured_port for the configured_cmd, and prepare all other
+ * ports for ICE_PTP_NOP. This causes the configured_port to execute the
+ * desired command while all other ports perform no operation.
+ */
+static int
+ice_ptp_one_port_cmd(struct ice_hw *hw, u8 configured_port,
+ enum ice_ptp_tmr_cmd configured_cmd)
+{
+ u8 port;
+
+ for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) {
+ enum ice_ptp_tmr_cmd cmd;
+ int err;
+
+ if (port == configured_port)
+ cmd = configured_cmd;
+ else
+ cmd = ICE_PTP_NOP;
+
+ err = ice_ptp_write_port_cmd_e822(hw, port, cmd);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_ptp_port_cmd_e822 - Prepare all ports for a timer command
+ * @hw: pointer to the HW struct
+ * @cmd: timer command to prepare
+ *
+ * Prepare all ports connected to this device for an upcoming timer sync
+ * command.
+ */
+static int
+ice_ptp_port_cmd_e822(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
+{
+ u8 port;
+
+ for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) {
+ int err;
+
+ err = ice_ptp_write_port_cmd_e822(hw, port, cmd);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/* E822 Vernier calibration functions
+ *
+ * The following functions are used as part of the vernier calibration of
+ * a port. This calibration increases the precision of the timestamps on the
+ * port.
+ */
+
+/**
+ * ice_phy_get_speed_and_fec_e822 - Get link speed and FEC based on serdes mode
+ * @hw: pointer to HW struct
+ * @port: the port to read from
+ * @link_out: if non-NULL, holds link speed on success
+ * @fec_out: if non-NULL, holds FEC algorithm on success
+ *
+ * Read the serdes data for the PHY port and extract the link speed and FEC
+ * algorithm.
+ */
+static int
+ice_phy_get_speed_and_fec_e822(struct ice_hw *hw, u8 port,
+ enum ice_ptp_link_spd *link_out,
+ enum ice_ptp_fec_mode *fec_out)
+{
+ enum ice_ptp_link_spd link;
+ enum ice_ptp_fec_mode fec;
+ u32 serdes;
+ int err;
+
+ err = ice_read_phy_reg_e822(hw, port, P_REG_LINK_SPEED, &serdes);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read serdes info\n");
+ return err;
+ }
+
+ /* Determine the FEC algorithm */
+ fec = (enum ice_ptp_fec_mode)P_REG_LINK_SPEED_FEC_MODE(serdes);
+
+ serdes &= P_REG_LINK_SPEED_SERDES_M;
+
+ /* Determine the link speed */
+ if (fec == ICE_PTP_FEC_MODE_RS_FEC) {
+ switch (serdes) {
+ case ICE_PTP_SERDES_25G:
+ link = ICE_PTP_LNK_SPD_25G_RS;
+ break;
+ case ICE_PTP_SERDES_50G:
+ link = ICE_PTP_LNK_SPD_50G_RS;
+ break;
+ case ICE_PTP_SERDES_100G:
+ link = ICE_PTP_LNK_SPD_100G_RS;
+ break;
+ default:
+ return -EIO;
+ }
+ } else {
+ switch (serdes) {
+ case ICE_PTP_SERDES_1G:
+ link = ICE_PTP_LNK_SPD_1G;
+ break;
+ case ICE_PTP_SERDES_10G:
+ link = ICE_PTP_LNK_SPD_10G;
+ break;
+ case ICE_PTP_SERDES_25G:
+ link = ICE_PTP_LNK_SPD_25G;
+ break;
+ case ICE_PTP_SERDES_40G:
+ link = ICE_PTP_LNK_SPD_40G;
+ break;
+ case ICE_PTP_SERDES_50G:
+ link = ICE_PTP_LNK_SPD_50G;
+ break;
+ default:
+ return -EIO;
+ }
+ }
+
+ if (link_out)
+ *link_out = link;
+ if (fec_out)
+ *fec_out = fec;
+
+ return 0;
+}
+
+/**
+ * ice_phy_cfg_lane_e822 - Configure PHY quad for single/multi-lane timestamp
+ * @hw: pointer to HW struct
+ * @port: to configure the quad for
+ */
+static void ice_phy_cfg_lane_e822(struct ice_hw *hw, u8 port)
+{
+ enum ice_ptp_link_spd link_spd;
+ int err;
+ u32 val;
+ u8 quad;
+
+ err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, NULL);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to get PHY link speed, err %d\n",
+ err);
+ return;
+ }
+
+ quad = port / ICE_PORTS_PER_QUAD;
+
+ err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG, &val);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_MEM_GLB_CFG, err %d\n",
+ err);
+ return;
+ }
+
+ if (link_spd >= ICE_PTP_LNK_SPD_40G)
+ val &= ~Q_REG_TX_MEM_GBL_CFG_LANE_TYPE_M;
+ else
+ val |= Q_REG_TX_MEM_GBL_CFG_LANE_TYPE_M;
+
+ err = ice_write_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG, val);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write back TX_MEM_GBL_CFG, err %d\n",
+ err);
+ return;
+ }
+}
+
+/**
+ * ice_phy_cfg_uix_e822 - Configure Serdes UI to TU conversion for E822
+ * @hw: pointer to the HW structure
+ * @port: the port to configure
+ *
+ * Program the conversion ration of Serdes clock "unit intervals" (UIs) to PHC
+ * hardware clock time units (TUs). That is, determine the number of TUs per
+ * serdes unit interval, and program the UIX registers with this conversion.
+ *
+ * This conversion is used as part of the calibration process when determining
+ * the additional error of a timestamp vs the real time of transmission or
+ * receipt of the packet.
+ *
+ * Hardware uses the number of TUs per 66 UIs, written to the UIX registers
+ * for the two main serdes clock rates, 10G/40G and 25G/100G serdes clocks.
+ *
+ * To calculate the conversion ratio, we use the following facts:
+ *
+ * a) the clock frequency in Hz (cycles per second)
+ * b) the number of TUs per cycle (the increment value of the clock)
+ * c) 1 second per 1 billion nanoseconds
+ * d) the duration of 66 UIs in nanoseconds
+ *
+ * Given these facts, we can use the following table to work out what ratios
+ * to multiply in order to get the number of TUs per 66 UIs:
+ *
+ * cycles | 1 second | incval (TUs) | nanoseconds
+ * -------+--------------+--------------+-------------
+ * second | 1 billion ns | cycle | 66 UIs
+ *
+ * To perform the multiplication using integers without too much loss of
+ * precision, we can take use the following equation:
+ *
+ * (freq * incval * 6600 LINE_UI ) / ( 100 * 1 billion)
+ *
+ * We scale up to using 6600 UI instead of 66 in order to avoid fractional
+ * nanosecond UIs (66 UI at 10G/40G is 6.4 ns)
+ *
+ * The increment value has a maximum expected range of about 34 bits, while
+ * the frequency value is about 29 bits. Multiplying these values shouldn't
+ * overflow the 64 bits. However, we must then further multiply them again by
+ * the Serdes unit interval duration. To avoid overflow here, we split the
+ * overall divide by 1e11 into a divide by 256 (shift down by 8 bits) and
+ * a divide by 390,625,000. This does lose some precision, but avoids
+ * miscalculation due to arithmetic overflow.
+ */
+static int ice_phy_cfg_uix_e822(struct ice_hw *hw, u8 port)
+{
+ u64 cur_freq, clk_incval, tu_per_sec, uix;
+ int err;
+
+ cur_freq = ice_e822_pll_freq(ice_e822_time_ref(hw));
+ clk_incval = ice_ptp_read_src_incval(hw);
+
+ /* Calculate TUs per second divided by 256 */
+ tu_per_sec = (cur_freq * clk_incval) >> 8;
+
+#define LINE_UI_10G_40G 640 /* 6600 UIs is 640 nanoseconds at 10Gb/40Gb */
+#define LINE_UI_25G_100G 256 /* 6600 UIs is 256 nanoseconds at 25Gb/100Gb */
+
+ /* Program the 10Gb/40Gb conversion ratio */
+ uix = div_u64(tu_per_sec * LINE_UI_10G_40G, 390625000);
+
+ err = ice_write_64b_phy_reg_e822(hw, port, P_REG_UIX66_10G_40G_L,
+ uix);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write UIX66_10G_40G, err %d\n",
+ err);
+ return err;
+ }
+
+ /* Program the 25Gb/100Gb conversion ratio */
+ uix = div_u64(tu_per_sec * LINE_UI_25G_100G, 390625000);
+
+ err = ice_write_64b_phy_reg_e822(hw, port, P_REG_UIX66_25G_100G_L,
+ uix);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write UIX66_25G_100G, err %d\n",
+ err);
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_phy_cfg_parpcs_e822 - Configure TUs per PAR/PCS clock cycle
+ * @hw: pointer to the HW struct
+ * @port: port to configure
+ *
+ * Configure the number of TUs for the PAR and PCS clocks used as part of the
+ * timestamp calibration process. This depends on the link speed, as the PHY
+ * uses different markers depending on the speed.
+ *
+ * 1Gb/10Gb/25Gb:
+ * - Tx/Rx PAR/PCS markers
+ *
+ * 25Gb RS:
+ * - Tx/Rx Reed Solomon gearbox PAR/PCS markers
+ *
+ * 40Gb/50Gb:
+ * - Tx/Rx PAR/PCS markers
+ * - Rx Deskew PAR/PCS markers
+ *
+ * 50G RS and 100GB RS:
+ * - Tx/Rx Reed Solomon gearbox PAR/PCS markers
+ * - Rx Deskew PAR/PCS markers
+ * - Tx PAR/PCS markers
+ *
+ * To calculate the conversion, we use the PHC clock frequency (cycles per
+ * second), the increment value (TUs per cycle), and the related PHY clock
+ * frequency to calculate the TUs per unit of the PHY link clock. The
+ * following table shows how the units convert:
+ *
+ * cycles | TUs | second
+ * -------+-------+--------
+ * second | cycle | cycles
+ *
+ * For each conversion register, look up the appropriate frequency from the
+ * e822 PAR/PCS table and calculate the TUs per unit of that clock. Program
+ * this to the appropriate register, preparing hardware to perform timestamp
+ * calibration to calculate the total Tx or Rx offset to adjust the timestamp
+ * in order to calibrate for the internal PHY delays.
+ *
+ * Note that the increment value ranges up to ~34 bits, and the clock
+ * frequency is ~29 bits, so multiplying them together should fit within the
+ * 64 bit arithmetic.
+ */
+static int ice_phy_cfg_parpcs_e822(struct ice_hw *hw, u8 port)
+{
+ u64 cur_freq, clk_incval, tu_per_sec, phy_tus;
+ enum ice_ptp_link_spd link_spd;
+ enum ice_ptp_fec_mode fec_mode;
+ int err;
+
+ err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode);
+ if (err)
+ return err;
+
+ cur_freq = ice_e822_pll_freq(ice_e822_time_ref(hw));
+ clk_incval = ice_ptp_read_src_incval(hw);
+
+ /* Calculate TUs per cycle of the PHC clock */
+ tu_per_sec = cur_freq * clk_incval;
+
+ /* For each PHY conversion register, look up the appropriate link
+ * speed frequency and determine the TUs per that clock's cycle time.
+ * Split this into a high and low value and then program the
+ * appropriate register. If that link speed does not use the
+ * associated register, write zeros to clear it instead.
+ */
+
+ /* P_REG_PAR_TX_TUS */
+ if (e822_vernier[link_spd].tx_par_clk)
+ phy_tus = div_u64(tu_per_sec,
+ e822_vernier[link_spd].tx_par_clk);
+ else
+ phy_tus = 0;
+
+ err = ice_write_40b_phy_reg_e822(hw, port, P_REG_PAR_TX_TUS_L,
+ phy_tus);
+ if (err)
+ return err;
+
+ /* P_REG_PAR_RX_TUS */
+ if (e822_vernier[link_spd].rx_par_clk)
+ phy_tus = div_u64(tu_per_sec,
+ e822_vernier[link_spd].rx_par_clk);
+ else
+ phy_tus = 0;
+
+ err = ice_write_40b_phy_reg_e822(hw, port, P_REG_PAR_RX_TUS_L,
+ phy_tus);
+ if (err)
+ return err;
+
+ /* P_REG_PCS_TX_TUS */
+ if (e822_vernier[link_spd].tx_pcs_clk)
+ phy_tus = div_u64(tu_per_sec,
+ e822_vernier[link_spd].tx_pcs_clk);
+ else
+ phy_tus = 0;
+
+ err = ice_write_40b_phy_reg_e822(hw, port, P_REG_PCS_TX_TUS_L,
+ phy_tus);
+ if (err)
+ return err;
+
+ /* P_REG_PCS_RX_TUS */
+ if (e822_vernier[link_spd].rx_pcs_clk)
+ phy_tus = div_u64(tu_per_sec,
+ e822_vernier[link_spd].rx_pcs_clk);
+ else
+ phy_tus = 0;
+
+ err = ice_write_40b_phy_reg_e822(hw, port, P_REG_PCS_RX_TUS_L,
+ phy_tus);
+ if (err)
+ return err;
+
+ /* P_REG_DESK_PAR_TX_TUS */
+ if (e822_vernier[link_spd].tx_desk_rsgb_par)
+ phy_tus = div_u64(tu_per_sec,
+ e822_vernier[link_spd].tx_desk_rsgb_par);
+ else
+ phy_tus = 0;
+
+ err = ice_write_40b_phy_reg_e822(hw, port, P_REG_DESK_PAR_TX_TUS_L,
+ phy_tus);
+ if (err)
+ return err;
+
+ /* P_REG_DESK_PAR_RX_TUS */
+ if (e822_vernier[link_spd].rx_desk_rsgb_par)
+ phy_tus = div_u64(tu_per_sec,
+ e822_vernier[link_spd].rx_desk_rsgb_par);
+ else
+ phy_tus = 0;
+
+ err = ice_write_40b_phy_reg_e822(hw, port, P_REG_DESK_PAR_RX_TUS_L,
+ phy_tus);
+ if (err)
+ return err;
+
+ /* P_REG_DESK_PCS_TX_TUS */
+ if (e822_vernier[link_spd].tx_desk_rsgb_pcs)
+ phy_tus = div_u64(tu_per_sec,
+ e822_vernier[link_spd].tx_desk_rsgb_pcs);
+ else
+ phy_tus = 0;
+
+ err = ice_write_40b_phy_reg_e822(hw, port, P_REG_DESK_PCS_TX_TUS_L,
+ phy_tus);
+ if (err)
+ return err;
+
+ /* P_REG_DESK_PCS_RX_TUS */
+ if (e822_vernier[link_spd].rx_desk_rsgb_pcs)
+ phy_tus = div_u64(tu_per_sec,
+ e822_vernier[link_spd].rx_desk_rsgb_pcs);
+ else
+ phy_tus = 0;
+
+ return ice_write_40b_phy_reg_e822(hw, port, P_REG_DESK_PCS_RX_TUS_L,
+ phy_tus);
+}
+
+/**
+ * ice_calc_fixed_tx_offset_e822 - Calculated Fixed Tx offset for a port
+ * @hw: pointer to the HW struct
+ * @link_spd: the Link speed to calculate for
+ *
+ * Calculate the fixed offset due to known static latency data.
+ */
+static u64
+ice_calc_fixed_tx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd)
+{
+ u64 cur_freq, clk_incval, tu_per_sec, fixed_offset;
+
+ cur_freq = ice_e822_pll_freq(ice_e822_time_ref(hw));
+ clk_incval = ice_ptp_read_src_incval(hw);
+
+ /* Calculate TUs per second */
+ tu_per_sec = cur_freq * clk_incval;
+
+ /* Calculate number of TUs to add for the fixed Tx latency. Since the
+ * latency measurement is in 1/100th of a nanosecond, we need to
+ * multiply by tu_per_sec and then divide by 1e11. This calculation
+ * overflows 64 bit integer arithmetic, so break it up into two
+ * divisions by 1e4 first then by 1e7.
+ */
+ fixed_offset = div_u64(tu_per_sec, 10000);
+ fixed_offset *= e822_vernier[link_spd].tx_fixed_delay;
+ fixed_offset = div_u64(fixed_offset, 10000000);
+
+ return fixed_offset;
+}
+
+/**
+ * ice_phy_cfg_tx_offset_e822 - Configure total Tx timestamp offset
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to configure
+ *
+ * Program the P_REG_TOTAL_TX_OFFSET register with the total number of TUs to
+ * adjust Tx timestamps by. This is calculated by combining some known static
+ * latency along with the Vernier offset computations done by hardware.
+ *
+ * This function must be called only after the offset registers are valid,
+ * i.e. after the Vernier calibration wait has passed, to ensure that the PHY
+ * has measured the offset.
+ *
+ * To avoid overflow, when calculating the offset based on the known static
+ * latency values, we use measurements in 1/100th of a nanosecond, and divide
+ * the TUs per second up front. This avoids overflow while allowing
+ * calculation of the adjustment using integer arithmetic.
+ */
+static int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port)
+{
+ enum ice_ptp_link_spd link_spd;
+ enum ice_ptp_fec_mode fec_mode;
+ u64 total_offset, val;
+ int err;
+
+ err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode);
+ if (err)
+ return err;
+
+ total_offset = ice_calc_fixed_tx_offset_e822(hw, link_spd);
+
+ /* Read the first Vernier offset from the PHY register and add it to
+ * the total offset.
+ */
+ if (link_spd == ICE_PTP_LNK_SPD_1G ||
+ link_spd == ICE_PTP_LNK_SPD_10G ||
+ link_spd == ICE_PTP_LNK_SPD_25G ||
+ link_spd == ICE_PTP_LNK_SPD_25G_RS ||
+ link_spd == ICE_PTP_LNK_SPD_40G ||
+ link_spd == ICE_PTP_LNK_SPD_50G) {
+ err = ice_read_64b_phy_reg_e822(hw, port,
+ P_REG_PAR_PCS_TX_OFFSET_L,
+ &val);
+ if (err)
+ return err;
+
+ total_offset += val;
+ }
+
+ /* For Tx, we only need to use the second Vernier offset for
+ * multi-lane link speeds with RS-FEC. The lanes will always be
+ * aligned.
+ */
+ if (link_spd == ICE_PTP_LNK_SPD_50G_RS ||
+ link_spd == ICE_PTP_LNK_SPD_100G_RS) {
+ err = ice_read_64b_phy_reg_e822(hw, port,
+ P_REG_PAR_TX_TIME_L,
+ &val);
+ if (err)
+ return err;
+
+ total_offset += val;
+ }
+
+ /* Now that the total offset has been calculated, program it to the
+ * PHY and indicate that the Tx offset is ready. After this,
+ * timestamps will be enabled.
+ */
+ err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_TX_OFFSET_L,
+ total_offset);
+ if (err)
+ return err;
+
+ err = ice_write_phy_reg_e822(hw, port, P_REG_TX_OR, 1);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/**
+ * ice_phy_cfg_fixed_tx_offset_e822 - Configure Tx offset for bypass mode
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to configure
+ *
+ * Calculate and program the fixed Tx offset, and indicate that the offset is
+ * ready. This can be used when operating in bypass mode.
+ */
+static int
+ice_phy_cfg_fixed_tx_offset_e822(struct ice_hw *hw, u8 port)
+{
+ enum ice_ptp_link_spd link_spd;
+ enum ice_ptp_fec_mode fec_mode;
+ u64 total_offset;
+ int err;
+
+ err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode);
+ if (err)
+ return err;
+
+ total_offset = ice_calc_fixed_tx_offset_e822(hw, link_spd);
+
+ /* Program the fixed Tx offset into the P_REG_TOTAL_TX_OFFSET_L
+ * register, then indicate that the Tx offset is ready. After this,
+ * timestamps will be enabled.
+ *
+ * Note that this skips including the more precise offsets generated
+ * by the Vernier calibration.
+ */
+ err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_TX_OFFSET_L,
+ total_offset);
+ if (err)
+ return err;
+
+ err = ice_write_phy_reg_e822(hw, port, P_REG_TX_OR, 1);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/**
+ * ice_phy_calc_pmd_adj_e822 - Calculate PMD adjustment for Rx
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to adjust for
+ * @link_spd: the current link speed of the PHY
+ * @fec_mode: the current FEC mode of the PHY
+ * @pmd_adj: on return, the amount to adjust the Rx total offset by
+ *
+ * Calculates the adjustment to Rx timestamps due to PMD alignment in the PHY.
+ * This varies by link speed and FEC mode. The value calculated accounts for
+ * various delays caused when receiving a packet.
+ */
+static int
+ice_phy_calc_pmd_adj_e822(struct ice_hw *hw, u8 port,
+ enum ice_ptp_link_spd link_spd,
+ enum ice_ptp_fec_mode fec_mode, u64 *pmd_adj)
+{
+ u64 cur_freq, clk_incval, tu_per_sec, mult, adj;
+ u8 pmd_align;
+ u32 val;
+ int err;
+
+ err = ice_read_phy_reg_e822(hw, port, P_REG_PMD_ALIGNMENT, &val);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read PMD alignment, err %d\n",
+ err);
+ return err;
+ }
+
+ pmd_align = (u8)val;
+
+ cur_freq = ice_e822_pll_freq(ice_e822_time_ref(hw));
+ clk_incval = ice_ptp_read_src_incval(hw);
+
+ /* Calculate TUs per second */
+ tu_per_sec = cur_freq * clk_incval;
+
+ /* The PMD alignment adjustment measurement depends on the link speed,
+ * and whether FEC is enabled. For each link speed, the alignment
+ * adjustment is calculated by dividing a value by the length of
+ * a Time Unit in nanoseconds.
+ *
+ * 1G: align == 4 ? 10 * 0.8 : (align + 6 % 10) * 0.8
+ * 10G: align == 65 ? 0 : (align * 0.1 * 32/33)
+ * 10G w/FEC: align * 0.1 * 32/33
+ * 25G: align == 65 ? 0 : (align * 0.4 * 32/33)
+ * 25G w/FEC: align * 0.4 * 32/33
+ * 40G: align == 65 ? 0 : (align * 0.1 * 32/33)
+ * 40G w/FEC: align * 0.1 * 32/33
+ * 50G: align == 65 ? 0 : (align * 0.4 * 32/33)
+ * 50G w/FEC: align * 0.8 * 32/33
+ *
+ * For RS-FEC, if align is < 17 then we must also add 1.6 * 32/33.
+ *
+ * To allow for calculating this value using integer arithmetic, we
+ * instead start with the number of TUs per second, (inverse of the
+ * length of a Time Unit in nanoseconds), multiply by a value based
+ * on the PMD alignment register, and then divide by the right value
+ * calculated based on the table above. To avoid integer overflow this
+ * division is broken up into a step of dividing by 125 first.
+ */
+ if (link_spd == ICE_PTP_LNK_SPD_1G) {
+ if (pmd_align == 4)
+ mult = 10;
+ else
+ mult = (pmd_align + 6) % 10;
+ } else if (link_spd == ICE_PTP_LNK_SPD_10G ||
+ link_spd == ICE_PTP_LNK_SPD_25G ||
+ link_spd == ICE_PTP_LNK_SPD_40G ||
+ link_spd == ICE_PTP_LNK_SPD_50G) {
+ /* If Clause 74 FEC, always calculate PMD adjust */
+ if (pmd_align != 65 || fec_mode == ICE_PTP_FEC_MODE_CLAUSE74)
+ mult = pmd_align;
+ else
+ mult = 0;
+ } else if (link_spd == ICE_PTP_LNK_SPD_25G_RS ||
+ link_spd == ICE_PTP_LNK_SPD_50G_RS ||
+ link_spd == ICE_PTP_LNK_SPD_100G_RS) {
+ if (pmd_align < 17)
+ mult = pmd_align + 40;
+ else
+ mult = pmd_align;
+ } else {
+ ice_debug(hw, ICE_DBG_PTP, "Unknown link speed %d, skipping PMD adjustment\n",
+ link_spd);
+ mult = 0;
+ }
+
+ /* In some cases, there's no need to adjust for the PMD alignment */
+ if (!mult) {
+ *pmd_adj = 0;
+ return 0;
+ }
+
+ /* Calculate the adjustment by multiplying TUs per second by the
+ * appropriate multiplier and divisor. To avoid overflow, we first
+ * divide by 125, and then handle remaining divisor based on the link
+ * speed pmd_adj_divisor value.
+ */
+ adj = div_u64(tu_per_sec, 125);
+ adj *= mult;
+ adj = div_u64(adj, e822_vernier[link_spd].pmd_adj_divisor);
+
+ /* Finally, for 25G-RS and 50G-RS, a further adjustment for the Rx
+ * cycle count is necessary.
+ */
+ if (link_spd == ICE_PTP_LNK_SPD_25G_RS) {
+ u64 cycle_adj;
+ u8 rx_cycle;
+
+ err = ice_read_phy_reg_e822(hw, port, P_REG_RX_40_TO_160_CNT,
+ &val);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read 25G-RS Rx cycle count, err %d\n",
+ err);
+ return err;
+ }
+
+ rx_cycle = val & P_REG_RX_40_TO_160_CNT_RXCYC_M;
+ if (rx_cycle) {
+ mult = (4 - rx_cycle) * 40;
+
+ cycle_adj = div_u64(tu_per_sec, 125);
+ cycle_adj *= mult;
+ cycle_adj = div_u64(cycle_adj, e822_vernier[link_spd].pmd_adj_divisor);
+
+ adj += cycle_adj;
+ }
+ } else if (link_spd == ICE_PTP_LNK_SPD_50G_RS) {
+ u64 cycle_adj;
+ u8 rx_cycle;
+
+ err = ice_read_phy_reg_e822(hw, port, P_REG_RX_80_TO_160_CNT,
+ &val);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read 50G-RS Rx cycle count, err %d\n",
+ err);
+ return err;
+ }
+
+ rx_cycle = val & P_REG_RX_80_TO_160_CNT_RXCYC_M;
+ if (rx_cycle) {
+ mult = rx_cycle * 40;
+
+ cycle_adj = div_u64(tu_per_sec, 125);
+ cycle_adj *= mult;
+ cycle_adj = div_u64(cycle_adj, e822_vernier[link_spd].pmd_adj_divisor);
+
+ adj += cycle_adj;
+ }
+ }
+
+ /* Return the calculated adjustment */
+ *pmd_adj = adj;
+
+ return 0;
+}
+
+/**
+ * ice_calc_fixed_rx_offset_e822 - Calculated the fixed Rx offset for a port
+ * @hw: pointer to HW struct
+ * @link_spd: The Link speed to calculate for
+ *
+ * Determine the fixed Rx latency for a given link speed.
+ */
+static u64
+ice_calc_fixed_rx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd)
+{
+ u64 cur_freq, clk_incval, tu_per_sec, fixed_offset;
+
+ cur_freq = ice_e822_pll_freq(ice_e822_time_ref(hw));
+ clk_incval = ice_ptp_read_src_incval(hw);
+
+ /* Calculate TUs per second */
+ tu_per_sec = cur_freq * clk_incval;
+
+ /* Calculate number of TUs to add for the fixed Rx latency. Since the
+ * latency measurement is in 1/100th of a nanosecond, we need to
+ * multiply by tu_per_sec and then divide by 1e11. This calculation
+ * overflows 64 bit integer arithmetic, so break it up into two
+ * divisions by 1e4 first then by 1e7.
+ */
+ fixed_offset = div_u64(tu_per_sec, 10000);
+ fixed_offset *= e822_vernier[link_spd].rx_fixed_delay;
+ fixed_offset = div_u64(fixed_offset, 10000000);
+
+ return fixed_offset;
+}
+
+/**
+ * ice_phy_cfg_rx_offset_e822 - Configure total Rx timestamp offset
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to configure
+ *
+ * Program the P_REG_TOTAL_RX_OFFSET register with the number of Time Units to
+ * adjust Rx timestamps by. This combines calculations from the Vernier offset
+ * measurements taken in hardware with some data about known fixed delay as
+ * well as adjusting for multi-lane alignment delay.
+ *
+ * This function must be called only after the offset registers are valid,
+ * i.e. after the Vernier calibration wait has passed, to ensure that the PHY
+ * has measured the offset.
+ *
+ * To avoid overflow, when calculating the offset based on the known static
+ * latency values, we use measurements in 1/100th of a nanosecond, and divide
+ * the TUs per second up front. This avoids overflow while allowing
+ * calculation of the adjustment using integer arithmetic.
+ */
+static int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port)
+{
+ enum ice_ptp_link_spd link_spd;
+ enum ice_ptp_fec_mode fec_mode;
+ u64 total_offset, pmd, val;
+ int err;
+
+ err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode);
+ if (err)
+ return err;
+
+ total_offset = ice_calc_fixed_rx_offset_e822(hw, link_spd);
+
+ /* Read the first Vernier offset from the PHY register and add it to
+ * the total offset.
+ */
+ err = ice_read_64b_phy_reg_e822(hw, port,
+ P_REG_PAR_PCS_RX_OFFSET_L,
+ &val);
+ if (err)
+ return err;
+
+ total_offset += val;
+
+ /* For Rx, all multi-lane link speeds include a second Vernier
+ * calibration, because the lanes might not be aligned.
+ */
+ if (link_spd == ICE_PTP_LNK_SPD_40G ||
+ link_spd == ICE_PTP_LNK_SPD_50G ||
+ link_spd == ICE_PTP_LNK_SPD_50G_RS ||
+ link_spd == ICE_PTP_LNK_SPD_100G_RS) {
+ err = ice_read_64b_phy_reg_e822(hw, port,
+ P_REG_PAR_RX_TIME_L,
+ &val);
+ if (err)
+ return err;
+
+ total_offset += val;
+ }
+
+ /* In addition, Rx must account for the PMD alignment */
+ err = ice_phy_calc_pmd_adj_e822(hw, port, link_spd, fec_mode, &pmd);
+ if (err)
+ return err;
+
+ /* For RS-FEC, this adjustment adds delay, but for other modes, it
+ * subtracts delay.
+ */
+ if (fec_mode == ICE_PTP_FEC_MODE_RS_FEC)
+ total_offset += pmd;
+ else
+ total_offset -= pmd;
+
+ /* Now that the total offset has been calculated, program it to the
+ * PHY and indicate that the Rx offset is ready. After this,
+ * timestamps will be enabled.
+ */
+ err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_RX_OFFSET_L,
+ total_offset);
+ if (err)
+ return err;
+
+ err = ice_write_phy_reg_e822(hw, port, P_REG_RX_OR, 1);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/**
+ * ice_phy_cfg_fixed_rx_offset_e822 - Configure fixed Rx offset for bypass mode
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to configure
+ *
+ * Calculate and program the fixed Rx offset, and indicate that the offset is
+ * ready. This can be used when operating in bypass mode.
+ */
+static int
+ice_phy_cfg_fixed_rx_offset_e822(struct ice_hw *hw, u8 port)
+{
+ enum ice_ptp_link_spd link_spd;
+ enum ice_ptp_fec_mode fec_mode;
+ u64 total_offset;
+ int err;
+
+ err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode);
+ if (err)
+ return err;
+
+ total_offset = ice_calc_fixed_rx_offset_e822(hw, link_spd);
+
+ /* Program the fixed Rx offset into the P_REG_TOTAL_RX_OFFSET_L
+ * register, then indicate that the Rx offset is ready. After this,
+ * timestamps will be enabled.
+ *
+ * Note that this skips including the more precise offsets generated
+ * by Vernier calibration.
+ */
+ err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_RX_OFFSET_L,
+ total_offset);
+ if (err)
+ return err;
+
+ err = ice_write_phy_reg_e822(hw, port, P_REG_RX_OR, 1);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/**
+ * ice_read_phy_and_phc_time_e822 - Simultaneously capture PHC and PHY time
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to read
+ * @phy_time: on return, the 64bit PHY timer value
+ * @phc_time: on return, the lower 64bits of PHC time
+ *
+ * Issue a READ_TIME timer command to simultaneously capture the PHY and PHC
+ * timer values.
+ */
+static int
+ice_read_phy_and_phc_time_e822(struct ice_hw *hw, u8 port, u64 *phy_time,
+ u64 *phc_time)
+{
+ u64 tx_time, rx_time;
+ u32 zo, lo;
+ u8 tmr_idx;
+ int err;
+
+ tmr_idx = ice_get_ptp_src_clock_index(hw);
+
+ /* Prepare the PHC timer for a READ_TIME capture command */
+ ice_ptp_src_cmd(hw, READ_TIME);
+
+ /* Prepare the PHY timer for a READ_TIME capture command */
+ err = ice_ptp_one_port_cmd(hw, port, READ_TIME);
+ if (err)
+ return err;
+
+ /* Issue the sync to start the READ_TIME capture */
+ ice_ptp_exec_tmr_cmd(hw);
+
+ /* Read the captured PHC time from the shadow time registers */
+ zo = rd32(hw, GLTSYN_SHTIME_0(tmr_idx));
+ lo = rd32(hw, GLTSYN_SHTIME_L(tmr_idx));
+ *phc_time = (u64)lo << 32 | zo;
+
+ /* Read the captured PHY time from the PHY shadow registers */
+ err = ice_ptp_read_port_capture(hw, port, &tx_time, &rx_time);
+ if (err)
+ return err;
+
+ /* If the PHY Tx and Rx timers don't match, log a warning message.
+ * Note that this should not happen in normal circumstances since the
+ * driver always programs them together.
+ */
+ if (tx_time != rx_time)
+ dev_warn(ice_hw_to_dev(hw),
+ "PHY port %u Tx and Rx timers do not match, tx_time 0x%016llX, rx_time 0x%016llX\n",
+ port, (unsigned long long)tx_time,
+ (unsigned long long)rx_time);
+
+ *phy_time = tx_time;
+
+ return 0;
+}
+
+/**
+ * ice_sync_phy_timer_e822 - Synchronize the PHY timer with PHC timer
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to synchronize
+ *
+ * Perform an adjustment to ensure that the PHY and PHC timers are in sync.
+ * This is done by issuing a READ_TIME command which triggers a simultaneous
+ * read of the PHY timer and PHC timer. Then we use the difference to
+ * calculate an appropriate 2s complement addition to add to the PHY timer in
+ * order to ensure it reads the same value as the primary PHC timer.
+ */
+static int ice_sync_phy_timer_e822(struct ice_hw *hw, u8 port)
+{
+ u64 phc_time, phy_time, difference;
+ int err;
+
+ if (!ice_ptp_lock(hw)) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to acquire PTP semaphore\n");
+ return -EBUSY;
+ }
+
+ err = ice_read_phy_and_phc_time_e822(hw, port, &phy_time, &phc_time);
+ if (err)
+ goto err_unlock;
+
+ /* Calculate the amount required to add to the port time in order for
+ * it to match the PHC time.
+ *
+ * Note that the port adjustment is done using 2s complement
+ * arithmetic. This is convenient since it means that we can simply
+ * calculate the difference between the PHC time and the port time,
+ * and it will be interpreted correctly.
+ */
+ difference = phc_time - phy_time;
+
+ err = ice_ptp_prep_port_adj_e822(hw, port, (s64)difference);
+ if (err)
+ goto err_unlock;
+
+ err = ice_ptp_one_port_cmd(hw, port, ADJ_TIME);
+ if (err)
+ goto err_unlock;
+
+ /* Do not perform any action on the main timer */
+ ice_ptp_src_cmd(hw, ICE_PTP_NOP);
+
+ /* Issue the sync to activate the time adjustment */
+ ice_ptp_exec_tmr_cmd(hw);
+
+ /* Re-capture the timer values to flush the command registers and
+ * verify that the time was properly adjusted.
+ */
+ err = ice_read_phy_and_phc_time_e822(hw, port, &phy_time, &phc_time);
+ if (err)
+ goto err_unlock;
+
+ dev_info(ice_hw_to_dev(hw),
+ "Port %u PHY time synced to PHC: 0x%016llX, 0x%016llX\n",
+ port, (unsigned long long)phy_time,
+ (unsigned long long)phc_time);
+
+ ice_ptp_unlock(hw);
+
+ return 0;
+
+err_unlock:
+ ice_ptp_unlock(hw);
+ return err;
+}
+
+/**
+ * ice_stop_phy_timer_e822 - Stop the PHY clock timer
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to stop
+ * @soft_reset: if true, hold the SOFT_RESET bit of P_REG_PS
+ *
+ * Stop the clock of a PHY port. This must be done as part of the flow to
+ * re-calibrate Tx and Rx timestamping offsets whenever the clock time is
+ * initialized or when link speed changes.
+ */
+int
+ice_stop_phy_timer_e822(struct ice_hw *hw, u8 port, bool soft_reset)
+{
+ int err;
+ u32 val;
+
+ err = ice_write_phy_reg_e822(hw, port, P_REG_TX_OR, 0);
+ if (err)
+ return err;
+
+ err = ice_write_phy_reg_e822(hw, port, P_REG_RX_OR, 0);
+ if (err)
+ return err;
+
+ err = ice_read_phy_reg_e822(hw, port, P_REG_PS, &val);
+ if (err)
+ return err;
+
+ val &= ~P_REG_PS_START_M;
+ err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+ if (err)
+ return err;
+
+ val &= ~P_REG_PS_ENA_CLK_M;
+ err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+ if (err)
+ return err;
+
+ if (soft_reset) {
+ val |= P_REG_PS_SFT_RESET_M;
+ err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+ if (err)
+ return err;
+ }
+
+ ice_debug(hw, ICE_DBG_PTP, "Disabled clock on PHY port %u\n", port);
+
+ return 0;
+}
+
+/**
+ * ice_start_phy_timer_e822 - Start the PHY clock timer
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to start
+ * @bypass: if true, start the PHY in bypass mode
+ *
+ * Start the clock of a PHY port. This must be done as part of the flow to
+ * re-calibrate Tx and Rx timestamping offsets whenever the clock time is
+ * initialized or when link speed changes.
+ *
+ * Bypass mode enables timestamps immediately without waiting for Vernier
+ * calibration to complete. Hardware will still continue taking Vernier
+ * measurements on Tx or Rx of packets, but they will not be applied to
+ * timestamps. Use ice_phy_exit_bypass_e822 to exit bypass mode once hardware
+ * has completed offset calculation.
+ */
+int
+ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass)
+{
+ u32 lo, hi, val;
+ u64 incval;
+ u8 tmr_idx;
+ int err;
+
+ tmr_idx = ice_get_ptp_src_clock_index(hw);
+
+ err = ice_stop_phy_timer_e822(hw, port, false);
+ if (err)
+ return err;
+
+ ice_phy_cfg_lane_e822(hw, port);
+
+ err = ice_phy_cfg_uix_e822(hw, port);
+ if (err)
+ return err;
+
+ err = ice_phy_cfg_parpcs_e822(hw, port);
+ if (err)
+ return err;
+
+ lo = rd32(hw, GLTSYN_INCVAL_L(tmr_idx));
+ hi = rd32(hw, GLTSYN_INCVAL_H(tmr_idx));
+ incval = (u64)hi << 32 | lo;
+
+ err = ice_write_40b_phy_reg_e822(hw, port, P_REG_TIMETUS_L, incval);
+ if (err)
+ return err;
+
+ err = ice_ptp_one_port_cmd(hw, port, INIT_INCVAL);
+ if (err)
+ return err;
+
+ /* Do not perform any action on the main timer */
+ ice_ptp_src_cmd(hw, ICE_PTP_NOP);
+
+ ice_ptp_exec_tmr_cmd(hw);
+
+ err = ice_read_phy_reg_e822(hw, port, P_REG_PS, &val);
+ if (err)
+ return err;
+
+ val |= P_REG_PS_SFT_RESET_M;
+ err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+ if (err)
+ return err;
+
+ val |= P_REG_PS_START_M;
+ err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+ if (err)
+ return err;
+
+ val &= ~P_REG_PS_SFT_RESET_M;
+ err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+ if (err)
+ return err;
+
+ err = ice_ptp_one_port_cmd(hw, port, INIT_INCVAL);
+ if (err)
+ return err;
+
+ ice_ptp_exec_tmr_cmd(hw);
+
+ val |= P_REG_PS_ENA_CLK_M;
+ err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+ if (err)
+ return err;
+
+ val |= P_REG_PS_LOAD_OFFSET_M;
+ err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+ if (err)
+ return err;
+
+ ice_ptp_exec_tmr_cmd(hw);
+
+ err = ice_sync_phy_timer_e822(hw, port);
+ if (err)
+ return err;
+
+ if (bypass) {
+ val |= P_REG_PS_BYPASS_MODE_M;
+ /* Enter BYPASS mode, enabling timestamps immediately. */
+ err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+ if (err)
+ return err;
+
+ /* Program the fixed Tx offset */
+ err = ice_phy_cfg_fixed_tx_offset_e822(hw, port);
+ if (err)
+ return err;
+
+ /* Program the fixed Rx offset */
+ err = ice_phy_cfg_fixed_rx_offset_e822(hw, port);
+ if (err)
+ return err;
+ }
+
+ ice_debug(hw, ICE_DBG_PTP, "Enabled clock on PHY port %u\n", port);
+
+ return 0;
+}
+
+/**
+ * ice_phy_exit_bypass_e822 - Exit bypass mode, after vernier calculations
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to configure
+ *
+ * After hardware finishes vernier calculations for the Tx and Rx offset, this
+ * function can be used to exit bypass mode by updating the total Tx and Rx
+ * offsets, and then disabling bypass. This will enable hardware to include
+ * the more precise offset calibrations, increasing precision of the generated
+ * timestamps.
+ *
+ * This cannot be done until hardware has measured the offsets, which requires
+ * waiting until at least one packet has been sent and received by the device.
+ */
+int ice_phy_exit_bypass_e822(struct ice_hw *hw, u8 port)
+{
+ int err;
+ u32 val;
+
+ err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OV_STATUS, &val);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OV_STATUS for port %u, err %d\n",
+ port, err);
+ return err;
+ }
+
+ if (!(val & P_REG_TX_OV_STATUS_OV_M)) {
+ ice_debug(hw, ICE_DBG_PTP, "Tx offset is not yet valid for port %u\n",
+ port);
+ return -EBUSY;
+ }
+
+ err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OV_STATUS, &val);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OV_STATUS for port %u, err %d\n",
+ port, err);
+ return err;
+ }
+
+ if (!(val & P_REG_TX_OV_STATUS_OV_M)) {
+ ice_debug(hw, ICE_DBG_PTP, "Rx offset is not yet valid for port %u\n",
+ port);
+ return -EBUSY;
+ }
+
+ err = ice_phy_cfg_tx_offset_e822(hw, port);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to program total Tx offset for port %u, err %d\n",
+ port, err);
+ return err;
+ }
+
+ err = ice_phy_cfg_rx_offset_e822(hw, port);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to program total Rx offset for port %u, err %d\n",
+ port, err);
+ return err;
+ }
+
+ /* Exit bypass mode now that the offset has been updated */
+ err = ice_read_phy_reg_e822(hw, port, P_REG_PS, &val);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read P_REG_PS for port %u, err %d\n",
+ port, err);
+ return err;
+ }
+
+ if (!(val & P_REG_PS_BYPASS_MODE_M))
+ ice_debug(hw, ICE_DBG_PTP, "Port %u not in bypass mode\n",
+ port);
+
+ val &= ~P_REG_PS_BYPASS_MODE_M;
+ err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to disable bypass for port %u, err %d\n",
+ port, err);
+ return err;
+ }
+
+ dev_info(ice_hw_to_dev(hw), "Exiting bypass mode on PHY port %u\n",
+ port);
+
+ return 0;
+}
+
+/* E810 functions
+ *
+ * The following functions operate on the E810 series devices which use
+ * a separate external PHY.
+ */
+
+/**
+ * ice_read_phy_reg_e810 - Read register from external PHY on E810
+ * @hw: pointer to the HW struct
+ * @addr: the address to read from
+ * @val: On return, the value read from the PHY
+ *
+ * Read a register from the external PHY on the E810 device.
+ */
+static int ice_read_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 *val)
+{
+ struct ice_sbq_msg_input msg = {0};
+ int err;
+
+ msg.msg_addr_low = lower_16_bits(addr);
+ msg.msg_addr_high = upper_16_bits(addr);
+ msg.opcode = ice_sbq_msg_rd;
+ msg.dest_dev = rmn_0;
+
+ err = ice_sbq_rw_reg(hw, &msg);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
+ err);
+ return err;
+ }
+
+ *val = msg.data;
+
+ return 0;
+}
+
+/**
+ * ice_write_phy_reg_e810 - Write register on external PHY on E810
+ * @hw: pointer to the HW struct
+ * @addr: the address to writem to
+ * @val: the value to write to the PHY
+ *
+ * Write a value to a register of the external PHY on the E810 device.
+ */
+static int ice_write_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 val)
+{
+ struct ice_sbq_msg_input msg = {0};
+ int err;
+
+ msg.msg_addr_low = lower_16_bits(addr);
+ msg.msg_addr_high = upper_16_bits(addr);
+ msg.opcode = ice_sbq_msg_wr;
+ msg.dest_dev = rmn_0;
+ msg.data = val;
+
+ err = ice_sbq_rw_reg(hw, &msg);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
+ err);
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_read_phy_tstamp_ll_e810 - Read a PHY timestamp registers through the FW
+ * @hw: pointer to the HW struct
+ * @idx: the timestamp index to read
+ * @hi: 8 bit timestamp high value
+ * @lo: 32 bit timestamp low value
+ *
+ * Read a 8bit timestamp high value and 32 bit timestamp low value out of the
+ * timestamp block of the external PHY on the E810 device using the low latency
+ * timestamp read.
+ */
+static int
+ice_read_phy_tstamp_ll_e810(struct ice_hw *hw, u8 idx, u8 *hi, u32 *lo)
+{
+ u32 val;
+ u8 i;
+
+ /* Write TS index to read to the PF register so the FW can read it */
+ val = FIELD_PREP(TS_LL_READ_TS_IDX, idx) | TS_LL_READ_TS;
+ wr32(hw, PF_SB_ATQBAL, val);
+
+ /* Read the register repeatedly until the FW provides us the TS */
+ for (i = TS_LL_READ_RETRIES; i > 0; i--) {
+ val = rd32(hw, PF_SB_ATQBAL);
+
+ /* When the bit is cleared, the TS is ready in the register */
+ if (!(FIELD_GET(TS_LL_READ_TS, val))) {
+ /* High 8 bit value of the TS is on the bits 16:23 */
+ *hi = FIELD_GET(TS_LL_READ_TS_HIGH, val);
+
+ /* Read the low 32 bit value and set the TS valid bit */
+ *lo = rd32(hw, PF_SB_ATQBAH) | TS_VALID;
+ return 0;
+ }
+
+ udelay(10);
+ }
+
+ /* FW failed to provide the TS in time */
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read PTP timestamp using low latency read\n");
+ return -EINVAL;
+}
+
+/**
+ * ice_read_phy_tstamp_sbq_e810 - Read a PHY timestamp registers through the sbq
+ * @hw: pointer to the HW struct
+ * @lport: the lport to read from
+ * @idx: the timestamp index to read
+ * @hi: 8 bit timestamp high value
+ * @lo: 32 bit timestamp low value
+ *
+ * Read a 8bit timestamp high value and 32 bit timestamp low value out of the
+ * timestamp block of the external PHY on the E810 device using sideband queue.
+ */
+static int
+ice_read_phy_tstamp_sbq_e810(struct ice_hw *hw, u8 lport, u8 idx, u8 *hi,
+ u32 *lo)
+{
+ u32 hi_addr = TS_EXT(HIGH_TX_MEMORY_BANK_START, lport, idx);
+ u32 lo_addr = TS_EXT(LOW_TX_MEMORY_BANK_START, lport, idx);
+ u32 lo_val, hi_val;
+ int err;
+
+ err = ice_read_phy_reg_e810(hw, lo_addr, &lo_val);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read low PTP timestamp register, err %d\n",
+ err);
+ return err;
+ }
+
+ err = ice_read_phy_reg_e810(hw, hi_addr, &hi_val);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read high PTP timestamp register, err %d\n",
+ err);
+ return err;
+ }
+
+ *lo = lo_val;
+ *hi = (u8)hi_val;
+
+ return 0;
+}
+
+/**
+ * ice_read_phy_tstamp_e810 - Read a PHY timestamp out of the external PHY
+ * @hw: pointer to the HW struct
+ * @lport: the lport to read from
+ * @idx: the timestamp index to read
+ * @tstamp: on return, the 40bit timestamp value
+ *
+ * Read a 40bit timestamp value out of the timestamp block of the external PHY
+ * on the E810 device.
+ */
+static int
+ice_read_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx, u64 *tstamp)
+{
+ u32 lo = 0;
+ u8 hi = 0;
+ int err;
+
+ if (hw->dev_caps.ts_dev_info.ts_ll_read)
+ err = ice_read_phy_tstamp_ll_e810(hw, idx, &hi, &lo);
+ else
+ err = ice_read_phy_tstamp_sbq_e810(hw, lport, idx, &hi, &lo);
+
+ if (err)
+ return err;
+
+ /* For E810 devices, the timestamp is reported with the lower 32 bits
+ * in the low register, and the upper 8 bits in the high register.
+ */
+ *tstamp = ((u64)hi) << TS_HIGH_S | ((u64)lo & TS_LOW_M);
+
+ return 0;
+}
+
+/**
+ * ice_clear_phy_tstamp_e810 - Clear a timestamp from the external PHY
+ * @hw: pointer to the HW struct
+ * @lport: the lport to read from
+ * @idx: the timestamp index to reset
+ *
+ * Clear a timestamp, resetting its valid bit, from the timestamp block of the
+ * external PHY on the E810 device.
+ */
+static int ice_clear_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx)
+{
+ u32 lo_addr, hi_addr;
+ int err;
+
+ lo_addr = TS_EXT(LOW_TX_MEMORY_BANK_START, lport, idx);
+ hi_addr = TS_EXT(HIGH_TX_MEMORY_BANK_START, lport, idx);
+
+ err = ice_write_phy_reg_e810(hw, lo_addr, 0);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register, err %d\n",
+ err);
+ return err;
+ }
+
+ err = ice_write_phy_reg_e810(hw, hi_addr, 0);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register, err %d\n",
+ err);
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_ptp_init_phy_e810 - Enable PTP function on the external PHY
+ * @hw: pointer to HW struct
+ *
+ * Enable the timesync PTP functionality for the external PHY connected to
+ * this function.
+ */
+int ice_ptp_init_phy_e810(struct ice_hw *hw)
+{
+ u8 tmr_idx;
+ int err;
+
+ tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+ err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_ENA(tmr_idx),
+ GLTSYN_ENA_TSYN_ENA_M);
+ if (err)
+ ice_debug(hw, ICE_DBG_PTP, "PTP failed in ena_phy_time_syn %d\n",
+ err);
+
+ return err;
+}
+
+/**
+ * ice_ptp_init_phc_e810 - Perform E810 specific PHC initialization
+ * @hw: pointer to HW struct
+ *
+ * Perform E810-specific PTP hardware clock initialization steps.
+ */
+static int ice_ptp_init_phc_e810(struct ice_hw *hw)
+{
+ /* Ensure synchronization delay is zero */
+ wr32(hw, GLTSYN_SYNC_DLAY, 0);
+
+ /* Initialize the PHY */
+ return ice_ptp_init_phy_e810(hw);
+}
+
+/**
+ * ice_ptp_prep_phy_time_e810 - Prepare PHY port with initial time
+ * @hw: Board private structure
+ * @time: Time to initialize the PHY port clock to
+ *
+ * Program the PHY port ETH_GLTSYN_SHTIME registers in preparation setting the
+ * initial clock time. The time will not actually be programmed until the
+ * driver issues an INIT_TIME command.
+ *
+ * The time value is the upper 32 bits of the PHY timer, usually in units of
+ * nominal nanoseconds.
+ */
+static int ice_ptp_prep_phy_time_e810(struct ice_hw *hw, u32 time)
+{
+ u8 tmr_idx;
+ int err;
+
+ tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+ err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHTIME_0(tmr_idx), 0);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write SHTIME_0, err %d\n",
+ err);
+ return err;
+ }
+
+ err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHTIME_L(tmr_idx), time);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write SHTIME_L, err %d\n",
+ err);
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_ptp_prep_phy_adj_e810 - Prep PHY port for a time adjustment
+ * @hw: pointer to HW struct
+ * @adj: adjustment value to program
+ *
+ * Prepare the PHY port for an atomic adjustment by programming the PHY
+ * ETH_GLTSYN_SHADJ_L and ETH_GLTSYN_SHADJ_H registers. The actual adjustment
+ * is completed by issuing an ADJ_TIME sync command.
+ *
+ * The adjustment value only contains the portion used for the upper 32bits of
+ * the PHY timer, usually in units of nominal nanoseconds. Negative
+ * adjustments are supported using 2s complement arithmetic.
+ */
+static int ice_ptp_prep_phy_adj_e810(struct ice_hw *hw, s32 adj)
+{
+ u8 tmr_idx;
+ int err;
+
+ tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+ /* Adjustments are represented as signed 2's complement values in
+ * nanoseconds. Sub-nanosecond adjustment is not supported.
+ */
+ err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_L(tmr_idx), 0);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write adj to PHY SHADJ_L, err %d\n",
+ err);
+ return err;
+ }
+
+ err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_H(tmr_idx), adj);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write adj to PHY SHADJ_H, err %d\n",
+ err);
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_ptp_prep_phy_incval_e810 - Prep PHY port increment value change
+ * @hw: pointer to HW struct
+ * @incval: The new 40bit increment value to prepare
+ *
+ * Prepare the PHY port for a new increment value by programming the PHY
+ * ETH_GLTSYN_SHADJ_L and ETH_GLTSYN_SHADJ_H registers. The actual change is
+ * completed by issuing an INIT_INCVAL command.
+ */
+static int ice_ptp_prep_phy_incval_e810(struct ice_hw *hw, u64 incval)
+{
+ u32 high, low;
+ u8 tmr_idx;
+ int err;
+
+ tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+ low = lower_32_bits(incval);
+ high = upper_32_bits(incval);
+
+ err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_L(tmr_idx), low);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write incval to PHY SHADJ_L, err %d\n",
+ err);
+ return err;
+ }
+
+ err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_H(tmr_idx), high);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write incval PHY SHADJ_H, err %d\n",
+ err);
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_ptp_port_cmd_e810 - Prepare all external PHYs for a timer command
+ * @hw: pointer to HW struct
+ * @cmd: Command to be sent to the port
+ *
+ * Prepare the external PHYs connected to this device for a timer sync
+ * command.
+ */
+static int ice_ptp_port_cmd_e810(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
+{
+ u32 cmd_val, val;
+ int err;
+
+ switch (cmd) {
+ case INIT_TIME:
+ cmd_val = GLTSYN_CMD_INIT_TIME;
+ break;
+ case INIT_INCVAL:
+ cmd_val = GLTSYN_CMD_INIT_INCVAL;
+ break;
+ case ADJ_TIME:
+ cmd_val = GLTSYN_CMD_ADJ_TIME;
+ break;
+ case READ_TIME:
+ cmd_val = GLTSYN_CMD_READ_TIME;
+ break;
+ case ADJ_TIME_AT_TIME:
+ cmd_val = GLTSYN_CMD_ADJ_INIT_TIME;
+ break;
+ case ICE_PTP_NOP:
+ return 0;
+ }
+
+ /* Read, modify, write */
+ err = ice_read_phy_reg_e810(hw, ETH_GLTSYN_CMD, &val);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read GLTSYN_CMD, err %d\n", err);
+ return err;
+ }
+
+ /* Modify necessary bits only and perform write */
+ val &= ~TS_CMD_MASK_E810;
+ val |= cmd_val;
+
+ err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_CMD, val);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to write back GLTSYN_CMD, err %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+/* Device agnostic functions
+ *
+ * The following functions implement shared behavior common to both E822 and
+ * E810 devices, possibly calling a device specific implementation where
+ * necessary.
+ */
+
+/**
+ * ice_ptp_lock - Acquire PTP global semaphore register lock
+ * @hw: pointer to the HW struct
+ *
+ * Acquire the global PTP hardware semaphore lock. Returns true if the lock
+ * was acquired, false otherwise.
+ *
+ * The PFTSYN_SEM register sets the busy bit on read, returning the previous
+ * value. If software sees the busy bit cleared, this means that this function
+ * acquired the lock (and the busy bit is now set). If software sees the busy
+ * bit set, it means that another function acquired the lock.
+ *
+ * Software must clear the busy bit with a write to release the lock for other
+ * functions when done.
+ */
+bool ice_ptp_lock(struct ice_hw *hw)
+{
+ u32 hw_lock;
+ int i;
+
+#define MAX_TRIES 5
+
+ for (i = 0; i < MAX_TRIES; i++) {
+ hw_lock = rd32(hw, PFTSYN_SEM + (PFTSYN_SEM_BYTES * hw->pf_id));
+ hw_lock = hw_lock & PFTSYN_SEM_BUSY_M;
+ if (!hw_lock)
+ break;
+
+ /* Somebody is holding the lock */
+ usleep_range(10000, 20000);
+ }
+
+ return !hw_lock;
+}
+
+/**
+ * ice_ptp_unlock - Release PTP global semaphore register lock
+ * @hw: pointer to the HW struct
+ *
+ * Release the global PTP hardware semaphore lock. This is done by writing to
+ * the PFTSYN_SEM register.
+ */
+void ice_ptp_unlock(struct ice_hw *hw)
+{
+ wr32(hw, PFTSYN_SEM + (PFTSYN_SEM_BYTES * hw->pf_id), 0);
+}
+
+/**
+ * ice_ptp_tmr_cmd - Prepare and trigger a timer sync command
+ * @hw: pointer to HW struct
+ * @cmd: the command to issue
+ *
+ * Prepare the source timer and PHY timers and then trigger the requested
+ * command. This causes the shadow registers previously written in preparation
+ * for the command to be synchronously applied to both the source and PHY
+ * timers.
+ */
+static int ice_ptp_tmr_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
+{
+ int err;
+
+ /* First, prepare the source timer */
+ ice_ptp_src_cmd(hw, cmd);
+
+ /* Next, prepare the ports */
+ if (ice_is_e810(hw))
+ err = ice_ptp_port_cmd_e810(hw, cmd);
+ else
+ err = ice_ptp_port_cmd_e822(hw, cmd);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to prepare PHY ports for timer command %u, err %d\n",
+ cmd, err);
+ return err;
+ }
+
+ /* Write the sync command register to drive both source and PHY timer
+ * commands synchronously
+ */
+ ice_ptp_exec_tmr_cmd(hw);
+
+ return 0;
+}
+
+/**
+ * ice_ptp_init_time - Initialize device time to provided value
+ * @hw: pointer to HW struct
+ * @time: 64bits of time (GLTSYN_TIME_L and GLTSYN_TIME_H)
+ *
+ * Initialize the device to the specified time provided. This requires a three
+ * step process:
+ *
+ * 1) write the new init time to the source timer shadow registers
+ * 2) write the new init time to the PHY timer shadow registers
+ * 3) issue an init_time timer command to synchronously switch both the source
+ * and port timers to the new init time value at the next clock cycle.
+ */
+int ice_ptp_init_time(struct ice_hw *hw, u64 time)
+{
+ u8 tmr_idx;
+ int err;
+
+ tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+ /* Source timers */
+ wr32(hw, GLTSYN_SHTIME_L(tmr_idx), lower_32_bits(time));
+ wr32(hw, GLTSYN_SHTIME_H(tmr_idx), upper_32_bits(time));
+ wr32(hw, GLTSYN_SHTIME_0(tmr_idx), 0);
+
+ /* PHY timers */
+ /* Fill Rx and Tx ports and send msg to PHY */
+ if (ice_is_e810(hw))
+ err = ice_ptp_prep_phy_time_e810(hw, time & 0xFFFFFFFF);
+ else
+ err = ice_ptp_prep_phy_time_e822(hw, time & 0xFFFFFFFF);
+ if (err)
+ return err;
+
+ return ice_ptp_tmr_cmd(hw, INIT_TIME);
+}
+
+/**
+ * ice_ptp_write_incval - Program PHC with new increment value
+ * @hw: pointer to HW struct
+ * @incval: Source timer increment value per clock cycle
+ *
+ * Program the PHC with a new increment value. This requires a three-step
+ * process:
+ *
+ * 1) Write the increment value to the source timer shadow registers
+ * 2) Write the increment value to the PHY timer shadow registers
+ * 3) Issue an INIT_INCVAL timer command to synchronously switch both the
+ * source and port timers to the new increment value at the next clock
+ * cycle.
+ */
+int ice_ptp_write_incval(struct ice_hw *hw, u64 incval)
+{
+ u8 tmr_idx;
+ int err;
+
+ tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+ /* Shadow Adjust */
+ wr32(hw, GLTSYN_SHADJ_L(tmr_idx), lower_32_bits(incval));
+ wr32(hw, GLTSYN_SHADJ_H(tmr_idx), upper_32_bits(incval));
+
+ if (ice_is_e810(hw))
+ err = ice_ptp_prep_phy_incval_e810(hw, incval);
+ else
+ err = ice_ptp_prep_phy_incval_e822(hw, incval);
+ if (err)
+ return err;
+
+ return ice_ptp_tmr_cmd(hw, INIT_INCVAL);
+}
+
+/**
+ * ice_ptp_write_incval_locked - Program new incval while holding semaphore
+ * @hw: pointer to HW struct
+ * @incval: Source timer increment value per clock cycle
+ *
+ * Program a new PHC incval while holding the PTP semaphore.
+ */
+int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval)
+{
+ int err;
+
+ if (!ice_ptp_lock(hw))
+ return -EBUSY;
+
+ err = ice_ptp_write_incval(hw, incval);
+
+ ice_ptp_unlock(hw);
+
+ return err;
+}
+
+/**
+ * ice_ptp_adj_clock - Adjust PHC clock time atomically
+ * @hw: pointer to HW struct
+ * @adj: Adjustment in nanoseconds
+ *
+ * Perform an atomic adjustment of the PHC time by the specified number of
+ * nanoseconds. This requires a three-step process:
+ *
+ * 1) Write the adjustment to the source timer shadow registers
+ * 2) Write the adjustment to the PHY timer shadow registers
+ * 3) Issue an ADJ_TIME timer command to synchronously apply the adjustment to
+ * both the source and port timers at the next clock cycle.
+ */
+int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj)
+{
+ u8 tmr_idx;
+ int err;
+
+ tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+ /* Write the desired clock adjustment into the GLTSYN_SHADJ register.
+ * For an ADJ_TIME command, this set of registers represents the value
+ * to add to the clock time. It supports subtraction by interpreting
+ * the value as a 2's complement integer.
+ */
+ wr32(hw, GLTSYN_SHADJ_L(tmr_idx), 0);
+ wr32(hw, GLTSYN_SHADJ_H(tmr_idx), adj);
+
+ if (ice_is_e810(hw))
+ err = ice_ptp_prep_phy_adj_e810(hw, adj);
+ else
+ err = ice_ptp_prep_phy_adj_e822(hw, adj);
+ if (err)
+ return err;
+
+ return ice_ptp_tmr_cmd(hw, ADJ_TIME);
+}
+
+/**
+ * ice_read_phy_tstamp - Read a PHY timestamp from the timestamo block
+ * @hw: pointer to the HW struct
+ * @block: the block to read from
+ * @idx: the timestamp index to read
+ * @tstamp: on return, the 40bit timestamp value
+ *
+ * Read a 40bit timestamp value out of the timestamp block. For E822 devices,
+ * the block is the quad to read from. For E810 devices, the block is the
+ * logical port to read from.
+ */
+int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp)
+{
+ if (ice_is_e810(hw))
+ return ice_read_phy_tstamp_e810(hw, block, idx, tstamp);
+ else
+ return ice_read_phy_tstamp_e822(hw, block, idx, tstamp);
+}
+
+/**
+ * ice_clear_phy_tstamp - Clear a timestamp from the timestamp block
+ * @hw: pointer to the HW struct
+ * @block: the block to read from
+ * @idx: the timestamp index to reset
+ *
+ * Clear a timestamp, resetting its valid bit, from the timestamp block. For
+ * E822 devices, the block is the quad to clear from. For E810 devices, the
+ * block is the logical port to clear from.
+ */
+int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx)
+{
+ if (ice_is_e810(hw))
+ return ice_clear_phy_tstamp_e810(hw, block, idx);
+ else
+ return ice_clear_phy_tstamp_e822(hw, block, idx);
+}
+
+/* E810T SMA functions
+ *
+ * The following functions operate specifically on E810T hardware and are used
+ * to access the extended GPIOs available.
+ */
+
+/**
+ * ice_get_pca9575_handle
+ * @hw: pointer to the hw struct
+ * @pca9575_handle: GPIO controller's handle
+ *
+ * Find and return the GPIO controller's handle in the netlist.
+ * When found - the value will be cached in the hw structure and following calls
+ * will return cached value
+ */
+static int
+ice_get_pca9575_handle(struct ice_hw *hw, u16 *pca9575_handle)
+{
+ struct ice_aqc_get_link_topo *cmd;
+ struct ice_aq_desc desc;
+ int status;
+ u8 idx;
+
+ /* If handle was read previously return cached value */
+ if (hw->io_expander_handle) {
+ *pca9575_handle = hw->io_expander_handle;
+ return 0;
+ }
+
+ /* If handle was not detected read it from the netlist */
+ cmd = &desc.params.get_link_topo;
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo);
+
+ /* Set node type to GPIO controller */
+ cmd->addr.topo_params.node_type_ctx =
+ (ICE_AQC_LINK_TOPO_NODE_TYPE_M &
+ ICE_AQC_LINK_TOPO_NODE_TYPE_GPIO_CTRL);
+
+#define SW_PCA9575_SFP_TOPO_IDX 2
+#define SW_PCA9575_QSFP_TOPO_IDX 1
+
+ /* Check if the SW IO expander controlling SMA exists in the netlist. */
+ if (hw->device_id == ICE_DEV_ID_E810C_SFP)
+ idx = SW_PCA9575_SFP_TOPO_IDX;
+ else if (hw->device_id == ICE_DEV_ID_E810C_QSFP)
+ idx = SW_PCA9575_QSFP_TOPO_IDX;
+ else
+ return -EOPNOTSUPP;
+
+ cmd->addr.topo_params.index = idx;
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+ if (status)
+ return -EOPNOTSUPP;
+
+ /* Verify if we found the right IO expander type */
+ if (desc.params.get_link_topo.node_part_num !=
+ ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575)
+ return -EOPNOTSUPP;
+
+ /* If present save the handle and return it */
+ hw->io_expander_handle =
+ le16_to_cpu(desc.params.get_link_topo.addr.handle);
+ *pca9575_handle = hw->io_expander_handle;
+
+ return 0;
+}
+
+/**
+ * ice_read_sma_ctrl_e810t
+ * @hw: pointer to the hw struct
+ * @data: pointer to data to be read from the GPIO controller
+ *
+ * Read the SMA controller state. It is connected to pins 3-7 of Port 1 of the
+ * PCA9575 expander, so only bits 3-7 in data are valid.
+ */
+int ice_read_sma_ctrl_e810t(struct ice_hw *hw, u8 *data)
+{
+ int status;
+ u16 handle;
+ u8 i;
+
+ status = ice_get_pca9575_handle(hw, &handle);
+ if (status)
+ return status;
+
+ *data = 0;
+
+ for (i = ICE_SMA_MIN_BIT_E810T; i <= ICE_SMA_MAX_BIT_E810T; i++) {
+ bool pin;
+
+ status = ice_aq_get_gpio(hw, handle, i + ICE_PCA9575_P1_OFFSET,
+ &pin, NULL);
+ if (status)
+ break;
+ *data |= (u8)(!pin) << i;
+ }
+
+ return status;
+}
+
+/**
+ * ice_write_sma_ctrl_e810t
+ * @hw: pointer to the hw struct
+ * @data: data to be written to the GPIO controller
+ *
+ * Write the data to the SMA controller. It is connected to pins 3-7 of Port 1
+ * of the PCA9575 expander, so only bits 3-7 in data are valid.
+ */
+int ice_write_sma_ctrl_e810t(struct ice_hw *hw, u8 data)
+{
+ int status;
+ u16 handle;
+ u8 i;
+
+ status = ice_get_pca9575_handle(hw, &handle);
+ if (status)
+ return status;
+
+ for (i = ICE_SMA_MIN_BIT_E810T; i <= ICE_SMA_MAX_BIT_E810T; i++) {
+ bool pin;
+
+ pin = !(data & (1 << i));
+ status = ice_aq_set_gpio(hw, handle, i + ICE_PCA9575_P1_OFFSET,
+ pin, NULL);
+ if (status)
+ break;
+ }
+
+ return status;
+}
+
+/**
+ * ice_read_pca9575_reg_e810t
+ * @hw: pointer to the hw struct
+ * @offset: GPIO controller register offset
+ * @data: pointer to data to be read from the GPIO controller
+ *
+ * Read the register from the GPIO controller
+ */
+int ice_read_pca9575_reg_e810t(struct ice_hw *hw, u8 offset, u8 *data)
+{
+ struct ice_aqc_link_topo_addr link_topo;
+ __le16 addr;
+ u16 handle;
+ int err;
+
+ memset(&link_topo, 0, sizeof(link_topo));
+
+ err = ice_get_pca9575_handle(hw, &handle);
+ if (err)
+ return err;
+
+ link_topo.handle = cpu_to_le16(handle);
+ link_topo.topo_params.node_type_ctx =
+ FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M,
+ ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED);
+
+ addr = cpu_to_le16((u16)offset);
+
+ return ice_aq_read_i2c(hw, link_topo, 0, addr, 1, data, NULL);
+}
+
+/**
+ * ice_is_pca9575_present
+ * @hw: pointer to the hw struct
+ *
+ * Check if the SW IO expander is present in the netlist
+ */
+bool ice_is_pca9575_present(struct ice_hw *hw)
+{
+ u16 handle = 0;
+ int status;
+
+ if (!ice_is_e810t(hw))
+ return false;
+
+ status = ice_get_pca9575_handle(hw, &handle);
+
+ return !status && handle;
+}
+
+/**
+ * ice_ptp_init_phc - Initialize PTP hardware clock
+ * @hw: pointer to the HW struct
+ *
+ * Perform the steps required to initialize the PTP hardware clock.
+ */
+int ice_ptp_init_phc(struct ice_hw *hw)
+{
+ u8 src_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+ /* Enable source clocks */
+ wr32(hw, GLTSYN_ENA(src_idx), GLTSYN_ENA_TSYN_ENA_M);
+
+ /* Clear event err indications for auxiliary pins */
+ (void)rd32(hw, GLTSYN_STAT(src_idx));
+
+ if (ice_is_e810(hw))
+ return ice_ptp_init_phc_e810(hw);
+ else
+ return ice_ptp_init_phc_e822(hw);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
new file mode 100644
index 000000000..071f545aa
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
@@ -0,0 +1,461 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _ICE_PTP_HW_H_
+#define _ICE_PTP_HW_H_
+
+enum ice_ptp_tmr_cmd {
+ INIT_TIME,
+ INIT_INCVAL,
+ ADJ_TIME,
+ ADJ_TIME_AT_TIME,
+ READ_TIME,
+ ICE_PTP_NOP,
+};
+
+enum ice_ptp_serdes {
+ ICE_PTP_SERDES_1G,
+ ICE_PTP_SERDES_10G,
+ ICE_PTP_SERDES_25G,
+ ICE_PTP_SERDES_40G,
+ ICE_PTP_SERDES_50G,
+ ICE_PTP_SERDES_100G
+};
+
+enum ice_ptp_link_spd {
+ ICE_PTP_LNK_SPD_1G,
+ ICE_PTP_LNK_SPD_10G,
+ ICE_PTP_LNK_SPD_25G,
+ ICE_PTP_LNK_SPD_25G_RS,
+ ICE_PTP_LNK_SPD_40G,
+ ICE_PTP_LNK_SPD_50G,
+ ICE_PTP_LNK_SPD_50G_RS,
+ ICE_PTP_LNK_SPD_100G_RS,
+ NUM_ICE_PTP_LNK_SPD /* Must be last */
+};
+
+enum ice_ptp_fec_mode {
+ ICE_PTP_FEC_MODE_NONE,
+ ICE_PTP_FEC_MODE_CLAUSE74,
+ ICE_PTP_FEC_MODE_RS_FEC
+};
+
+/**
+ * struct ice_time_ref_info_e822
+ * @pll_freq: Frequency of PLL that drives timer ticks in Hz
+ * @nominal_incval: increment to generate nanoseconds in GLTSYN_TIME_L
+ * @pps_delay: propagation delay of the PPS output signal
+ *
+ * Characteristic information for the various TIME_REF sources possible in the
+ * E822 devices
+ */
+struct ice_time_ref_info_e822 {
+ u64 pll_freq;
+ u64 nominal_incval;
+ u8 pps_delay;
+};
+
+/**
+ * struct ice_vernier_info_e822
+ * @tx_par_clk: Frequency used to calculate P_REG_PAR_TX_TUS
+ * @rx_par_clk: Frequency used to calculate P_REG_PAR_RX_TUS
+ * @tx_pcs_clk: Frequency used to calculate P_REG_PCS_TX_TUS
+ * @rx_pcs_clk: Frequency used to calculate P_REG_PCS_RX_TUS
+ * @tx_desk_rsgb_par: Frequency used to calculate P_REG_DESK_PAR_TX_TUS
+ * @rx_desk_rsgb_par: Frequency used to calculate P_REG_DESK_PAR_RX_TUS
+ * @tx_desk_rsgb_pcs: Frequency used to calculate P_REG_DESK_PCS_TX_TUS
+ * @rx_desk_rsgb_pcs: Frequency used to calculate P_REG_DESK_PCS_RX_TUS
+ * @tx_fixed_delay: Fixed Tx latency measured in 1/100th nanoseconds
+ * @pmd_adj_divisor: Divisor used to calculate PDM alignment adjustment
+ * @rx_fixed_delay: Fixed Rx latency measured in 1/100th nanoseconds
+ *
+ * Table of constants used during as part of the Vernier calibration of the Tx
+ * and Rx timestamps. This includes frequency values used to compute TUs per
+ * PAR/PCS clock cycle, and static delay values measured during hardware
+ * design.
+ *
+ * Note that some values are not used for all link speeds, and the
+ * P_REG_DESK_PAR* registers may represent different clock markers at
+ * different link speeds, either the deskew marker for multi-lane link speeds
+ * or the Reed Solomon gearbox marker for RS-FEC.
+ */
+struct ice_vernier_info_e822 {
+ u32 tx_par_clk;
+ u32 rx_par_clk;
+ u32 tx_pcs_clk;
+ u32 rx_pcs_clk;
+ u32 tx_desk_rsgb_par;
+ u32 rx_desk_rsgb_par;
+ u32 tx_desk_rsgb_pcs;
+ u32 rx_desk_rsgb_pcs;
+ u32 tx_fixed_delay;
+ u32 pmd_adj_divisor;
+ u32 rx_fixed_delay;
+};
+
+/**
+ * struct ice_cgu_pll_params_e822
+ * @refclk_pre_div: Reference clock pre-divisor
+ * @feedback_div: Feedback divisor
+ * @frac_n_div: Fractional divisor
+ * @post_pll_div: Post PLL divisor
+ *
+ * Clock Generation Unit parameters used to program the PLL based on the
+ * selected TIME_REF frequency.
+ */
+struct ice_cgu_pll_params_e822 {
+ u32 refclk_pre_div;
+ u32 feedback_div;
+ u32 frac_n_div;
+ u32 post_pll_div;
+};
+
+extern const struct
+ice_cgu_pll_params_e822 e822_cgu_params[NUM_ICE_TIME_REF_FREQ];
+
+/* Table of constants related to possible TIME_REF sources */
+extern const struct ice_time_ref_info_e822 e822_time_ref[NUM_ICE_TIME_REF_FREQ];
+
+/* Table of constants for Vernier calibration on E822 */
+extern const struct ice_vernier_info_e822 e822_vernier[NUM_ICE_PTP_LNK_SPD];
+
+/* Increment value to generate nanoseconds in the GLTSYN_TIME_L register for
+ * the E810 devices. Based off of a PLL with an 812.5 MHz frequency.
+ */
+#define ICE_PTP_NOMINAL_INCVAL_E810 0x13b13b13bULL
+
+/* Device agnostic functions */
+u8 ice_get_ptp_src_clock_index(struct ice_hw *hw);
+bool ice_ptp_lock(struct ice_hw *hw);
+void ice_ptp_unlock(struct ice_hw *hw);
+int ice_ptp_init_time(struct ice_hw *hw, u64 time);
+int ice_ptp_write_incval(struct ice_hw *hw, u64 incval);
+int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval);
+int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj);
+int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp);
+int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx);
+int ice_ptp_init_phc(struct ice_hw *hw);
+
+/* E822 family functions */
+int ice_read_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 *val);
+int ice_write_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 val);
+int ice_read_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 *val);
+int ice_write_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 val);
+int ice_ptp_prep_port_adj_e822(struct ice_hw *hw, u8 port, s64 time);
+
+/**
+ * ice_e822_time_ref - Get the current TIME_REF from capabilities
+ * @hw: pointer to the HW structure
+ *
+ * Returns the current TIME_REF from the capabilities structure.
+ */
+static inline enum ice_time_ref_freq ice_e822_time_ref(struct ice_hw *hw)
+{
+ return hw->func_caps.ts_func_info.time_ref;
+}
+
+/**
+ * ice_set_e822_time_ref - Set new TIME_REF
+ * @hw: pointer to the HW structure
+ * @time_ref: new TIME_REF to set
+ *
+ * Update the TIME_REF in the capabilities structure in response to some
+ * change, such as an update to the CGU registers.
+ */
+static inline void
+ice_set_e822_time_ref(struct ice_hw *hw, enum ice_time_ref_freq time_ref)
+{
+ hw->func_caps.ts_func_info.time_ref = time_ref;
+}
+
+static inline u64 ice_e822_pll_freq(enum ice_time_ref_freq time_ref)
+{
+ return e822_time_ref[time_ref].pll_freq;
+}
+
+static inline u64 ice_e822_nominal_incval(enum ice_time_ref_freq time_ref)
+{
+ return e822_time_ref[time_ref].nominal_incval;
+}
+
+static inline u64 ice_e822_pps_delay(enum ice_time_ref_freq time_ref)
+{
+ return e822_time_ref[time_ref].pps_delay;
+}
+
+/* E822 Vernier calibration functions */
+int ice_stop_phy_timer_e822(struct ice_hw *hw, u8 port, bool soft_reset);
+int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass);
+int ice_phy_exit_bypass_e822(struct ice_hw *hw, u8 port);
+
+/* E810 family functions */
+int ice_ptp_init_phy_e810(struct ice_hw *hw);
+int ice_read_sma_ctrl_e810t(struct ice_hw *hw, u8 *data);
+int ice_write_sma_ctrl_e810t(struct ice_hw *hw, u8 data);
+int ice_read_pca9575_reg_e810t(struct ice_hw *hw, u8 offset, u8 *data);
+bool ice_is_pca9575_present(struct ice_hw *hw);
+
+#define PFTSYN_SEM_BYTES 4
+
+#define ICE_PTP_CLOCK_INDEX_0 0x00
+#define ICE_PTP_CLOCK_INDEX_1 0x01
+
+/* PHY timer commands */
+#define SEL_CPK_SRC 8
+#define SEL_PHY_SRC 3
+
+/* Time Sync command Definitions */
+#define GLTSYN_CMD_INIT_TIME BIT(0)
+#define GLTSYN_CMD_INIT_INCVAL BIT(1)
+#define GLTSYN_CMD_INIT_TIME_INCVAL (BIT(0) | BIT(1))
+#define GLTSYN_CMD_ADJ_TIME BIT(2)
+#define GLTSYN_CMD_ADJ_INIT_TIME (BIT(2) | BIT(3))
+#define GLTSYN_CMD_READ_TIME BIT(7)
+
+/* PHY port Time Sync command definitions */
+#define PHY_CMD_INIT_TIME BIT(0)
+#define PHY_CMD_INIT_INCVAL BIT(1)
+#define PHY_CMD_ADJ_TIME (BIT(0) | BIT(1))
+#define PHY_CMD_ADJ_TIME_AT_TIME (BIT(0) | BIT(2))
+#define PHY_CMD_READ_TIME (BIT(0) | BIT(1) | BIT(2))
+
+#define TS_CMD_MASK_E810 0xFF
+#define TS_CMD_MASK 0xF
+#define SYNC_EXEC_CMD 0x3
+
+/* Macros to derive port low and high addresses on both quads */
+#define P_Q0_L(a, p) ((((a) + (0x2000 * (p)))) & 0xFFFF)
+#define P_Q0_H(a, p) ((((a) + (0x2000 * (p)))) >> 16)
+#define P_Q1_L(a, p) ((((a) - (0x2000 * ((p) - ICE_PORTS_PER_QUAD)))) & 0xFFFF)
+#define P_Q1_H(a, p) ((((a) - (0x2000 * ((p) - ICE_PORTS_PER_QUAD)))) >> 16)
+
+/* PHY QUAD register base addresses */
+#define Q_0_BASE 0x94000
+#define Q_1_BASE 0x114000
+
+/* Timestamp memory reset registers */
+#define Q_REG_TS_CTRL 0x618
+#define Q_REG_TS_CTRL_S 0
+#define Q_REG_TS_CTRL_M BIT(0)
+
+/* Timestamp availability status registers */
+#define Q_REG_TX_MEMORY_STATUS_L 0xCF0
+#define Q_REG_TX_MEMORY_STATUS_U 0xCF4
+
+/* Tx FIFO status registers */
+#define Q_REG_FIFO23_STATUS 0xCF8
+#define Q_REG_FIFO01_STATUS 0xCFC
+#define Q_REG_FIFO02_S 0
+#define Q_REG_FIFO02_M ICE_M(0x3FF, 0)
+#define Q_REG_FIFO13_S 10
+#define Q_REG_FIFO13_M ICE_M(0x3FF, 10)
+
+/* Interrupt control Config registers */
+#define Q_REG_TX_MEM_GBL_CFG 0xC08
+#define Q_REG_TX_MEM_GBL_CFG_LANE_TYPE_S 0
+#define Q_REG_TX_MEM_GBL_CFG_LANE_TYPE_M BIT(0)
+#define Q_REG_TX_MEM_GBL_CFG_TX_TYPE_S 1
+#define Q_REG_TX_MEM_GBL_CFG_TX_TYPE_M ICE_M(0xFF, 1)
+#define Q_REG_TX_MEM_GBL_CFG_INTR_THR_S 9
+#define Q_REG_TX_MEM_GBL_CFG_INTR_THR_M ICE_M(0x3F, 9)
+#define Q_REG_TX_MEM_GBL_CFG_INTR_ENA_S 15
+#define Q_REG_TX_MEM_GBL_CFG_INTR_ENA_M BIT(15)
+
+/* Tx Timestamp data registers */
+#define Q_REG_TX_MEMORY_BANK_START 0xA00
+
+/* PHY port register base addresses */
+#define P_0_BASE 0x80000
+#define P_4_BASE 0x106000
+
+/* Timestamp init registers */
+#define P_REG_RX_TIMER_INC_PRE_L 0x46C
+#define P_REG_RX_TIMER_INC_PRE_U 0x470
+#define P_REG_TX_TIMER_INC_PRE_L 0x44C
+#define P_REG_TX_TIMER_INC_PRE_U 0x450
+
+/* Timestamp match and adjust target registers */
+#define P_REG_RX_TIMER_CNT_ADJ_L 0x474
+#define P_REG_RX_TIMER_CNT_ADJ_U 0x478
+#define P_REG_TX_TIMER_CNT_ADJ_L 0x454
+#define P_REG_TX_TIMER_CNT_ADJ_U 0x458
+
+/* Timestamp capture registers */
+#define P_REG_RX_CAPTURE_L 0x4D8
+#define P_REG_RX_CAPTURE_U 0x4DC
+#define P_REG_TX_CAPTURE_L 0x4B4
+#define P_REG_TX_CAPTURE_U 0x4B8
+
+/* Timestamp PHY incval registers */
+#define P_REG_TIMETUS_L 0x410
+#define P_REG_TIMETUS_U 0x414
+
+#define P_REG_40B_LOW_M 0xFF
+#define P_REG_40B_HIGH_S 8
+
+/* PHY window length registers */
+#define P_REG_WL 0x40C
+
+#define PTP_VERNIER_WL 0x111ed
+
+/* PHY start registers */
+#define P_REG_PS 0x408
+#define P_REG_PS_START_S 0
+#define P_REG_PS_START_M BIT(0)
+#define P_REG_PS_BYPASS_MODE_S 1
+#define P_REG_PS_BYPASS_MODE_M BIT(1)
+#define P_REG_PS_ENA_CLK_S 2
+#define P_REG_PS_ENA_CLK_M BIT(2)
+#define P_REG_PS_LOAD_OFFSET_S 3
+#define P_REG_PS_LOAD_OFFSET_M BIT(3)
+#define P_REG_PS_SFT_RESET_S 11
+#define P_REG_PS_SFT_RESET_M BIT(11)
+
+/* PHY offset valid registers */
+#define P_REG_TX_OV_STATUS 0x4D4
+#define P_REG_TX_OV_STATUS_OV_S 0
+#define P_REG_TX_OV_STATUS_OV_M BIT(0)
+#define P_REG_RX_OV_STATUS 0x4F8
+#define P_REG_RX_OV_STATUS_OV_S 0
+#define P_REG_RX_OV_STATUS_OV_M BIT(0)
+
+/* PHY offset ready registers */
+#define P_REG_TX_OR 0x45C
+#define P_REG_RX_OR 0x47C
+
+/* PHY total offset registers */
+#define P_REG_TOTAL_RX_OFFSET_L 0x460
+#define P_REG_TOTAL_RX_OFFSET_U 0x464
+#define P_REG_TOTAL_TX_OFFSET_L 0x440
+#define P_REG_TOTAL_TX_OFFSET_U 0x444
+
+/* Timestamp PAR/PCS registers */
+#define P_REG_UIX66_10G_40G_L 0x480
+#define P_REG_UIX66_10G_40G_U 0x484
+#define P_REG_UIX66_25G_100G_L 0x488
+#define P_REG_UIX66_25G_100G_U 0x48C
+#define P_REG_DESK_PAR_RX_TUS_L 0x490
+#define P_REG_DESK_PAR_RX_TUS_U 0x494
+#define P_REG_DESK_PAR_TX_TUS_L 0x498
+#define P_REG_DESK_PAR_TX_TUS_U 0x49C
+#define P_REG_DESK_PCS_RX_TUS_L 0x4A0
+#define P_REG_DESK_PCS_RX_TUS_U 0x4A4
+#define P_REG_DESK_PCS_TX_TUS_L 0x4A8
+#define P_REG_DESK_PCS_TX_TUS_U 0x4AC
+#define P_REG_PAR_RX_TUS_L 0x420
+#define P_REG_PAR_RX_TUS_U 0x424
+#define P_REG_PAR_TX_TUS_L 0x428
+#define P_REG_PAR_TX_TUS_U 0x42C
+#define P_REG_PCS_RX_TUS_L 0x430
+#define P_REG_PCS_RX_TUS_U 0x434
+#define P_REG_PCS_TX_TUS_L 0x438
+#define P_REG_PCS_TX_TUS_U 0x43C
+#define P_REG_PAR_RX_TIME_L 0x4F0
+#define P_REG_PAR_RX_TIME_U 0x4F4
+#define P_REG_PAR_TX_TIME_L 0x4CC
+#define P_REG_PAR_TX_TIME_U 0x4D0
+#define P_REG_PAR_PCS_RX_OFFSET_L 0x4E8
+#define P_REG_PAR_PCS_RX_OFFSET_U 0x4EC
+#define P_REG_PAR_PCS_TX_OFFSET_L 0x4C4
+#define P_REG_PAR_PCS_TX_OFFSET_U 0x4C8
+#define P_REG_LINK_SPEED 0x4FC
+#define P_REG_LINK_SPEED_SERDES_S 0
+#define P_REG_LINK_SPEED_SERDES_M ICE_M(0x7, 0)
+#define P_REG_LINK_SPEED_FEC_MODE_S 3
+#define P_REG_LINK_SPEED_FEC_MODE_M ICE_M(0x3, 3)
+#define P_REG_LINK_SPEED_FEC_MODE(reg) \
+ (((reg) & P_REG_LINK_SPEED_FEC_MODE_M) >> \
+ P_REG_LINK_SPEED_FEC_MODE_S)
+
+/* PHY timestamp related registers */
+#define P_REG_PMD_ALIGNMENT 0x0FC
+#define P_REG_RX_80_TO_160_CNT 0x6FC
+#define P_REG_RX_80_TO_160_CNT_RXCYC_S 0
+#define P_REG_RX_80_TO_160_CNT_RXCYC_M BIT(0)
+#define P_REG_RX_40_TO_160_CNT 0x8FC
+#define P_REG_RX_40_TO_160_CNT_RXCYC_S 0
+#define P_REG_RX_40_TO_160_CNT_RXCYC_M ICE_M(0x3, 0)
+
+/* Rx FIFO status registers */
+#define P_REG_RX_OV_FS 0x4F8
+#define P_REG_RX_OV_FS_FIFO_STATUS_S 2
+#define P_REG_RX_OV_FS_FIFO_STATUS_M ICE_M(0x3FF, 2)
+
+/* Timestamp command registers */
+#define P_REG_TX_TMR_CMD 0x448
+#define P_REG_RX_TMR_CMD 0x468
+
+/* E810 timesync enable register */
+#define ETH_GLTSYN_ENA(_i) (0x03000348 + ((_i) * 4))
+
+/* E810 shadow init time registers */
+#define ETH_GLTSYN_SHTIME_0(i) (0x03000368 + ((i) * 32))
+#define ETH_GLTSYN_SHTIME_L(i) (0x0300036C + ((i) * 32))
+
+/* E810 shadow time adjust registers */
+#define ETH_GLTSYN_SHADJ_L(_i) (0x03000378 + ((_i) * 32))
+#define ETH_GLTSYN_SHADJ_H(_i) (0x0300037C + ((_i) * 32))
+
+/* E810 timer command register */
+#define ETH_GLTSYN_CMD 0x03000344
+
+/* Source timer incval macros */
+#define INCVAL_HIGH_M 0xFF
+
+/* Timestamp block macros */
+#define TS_VALID BIT(0)
+#define TS_LOW_M 0xFFFFFFFF
+#define TS_HIGH_M 0xFF
+#define TS_HIGH_S 32
+
+#define TS_PHY_LOW_M 0xFF
+#define TS_PHY_HIGH_M 0xFFFFFFFF
+#define TS_PHY_HIGH_S 8
+
+#define BYTES_PER_IDX_ADDR_L_U 8
+#define BYTES_PER_IDX_ADDR_L 4
+
+/* Tx timestamp low latency read definitions */
+#define TS_LL_READ_RETRIES 200
+#define TS_LL_READ_TS_HIGH GENMASK(23, 16)
+#define TS_LL_READ_TS_IDX GENMASK(29, 24)
+#define TS_LL_READ_TS BIT(31)
+
+/* Internal PHY timestamp address */
+#define TS_L(a, idx) ((a) + ((idx) * BYTES_PER_IDX_ADDR_L_U))
+#define TS_H(a, idx) ((a) + ((idx) * BYTES_PER_IDX_ADDR_L_U + \
+ BYTES_PER_IDX_ADDR_L))
+
+/* External PHY timestamp address */
+#define TS_EXT(a, port, idx) ((a) + (0x1000 * (port)) + \
+ ((idx) * BYTES_PER_IDX_ADDR_L_U))
+
+#define LOW_TX_MEMORY_BANK_START 0x03090000
+#define HIGH_TX_MEMORY_BANK_START 0x03090004
+
+/* E810T SMA controller pin control */
+#define ICE_SMA1_DIR_EN_E810T BIT(4)
+#define ICE_SMA1_TX_EN_E810T BIT(5)
+#define ICE_SMA2_UFL2_RX_DIS_E810T BIT(3)
+#define ICE_SMA2_DIR_EN_E810T BIT(6)
+#define ICE_SMA2_TX_EN_E810T BIT(7)
+
+#define ICE_SMA1_MASK_E810T (ICE_SMA1_DIR_EN_E810T | \
+ ICE_SMA1_TX_EN_E810T)
+#define ICE_SMA2_MASK_E810T (ICE_SMA2_UFL2_RX_DIS_E810T | \
+ ICE_SMA2_DIR_EN_E810T | \
+ ICE_SMA2_TX_EN_E810T)
+#define ICE_ALL_SMA_MASK_E810T (ICE_SMA1_MASK_E810T | \
+ ICE_SMA2_MASK_E810T)
+
+#define ICE_SMA_MIN_BIT_E810T 3
+#define ICE_SMA_MAX_BIT_E810T 7
+#define ICE_PCA9575_P1_OFFSET 8
+
+/* E810T PCA9575 IO controller registers */
+#define ICE_PCA9575_P0_IN 0x0
+
+/* E810T PCA9575 IO controller pin control */
+#define ICE_E810T_P0_GNSS_PRSNT_N BIT(4)
+
+#endif /* _ICE_PTP_HW_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c
new file mode 100644
index 000000000..bd31748aa
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_repr.c
@@ -0,0 +1,467 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_eswitch.h"
+#include "ice_devlink.h"
+#include "ice_sriov.h"
+#include "ice_tc_lib.h"
+
+/**
+ * ice_repr_get_sw_port_id - get port ID associated with representor
+ * @repr: pointer to port representor
+ */
+static int ice_repr_get_sw_port_id(struct ice_repr *repr)
+{
+ return repr->vf->pf->hw.port_info->lport;
+}
+
+/**
+ * ice_repr_get_phys_port_name - get phys port name
+ * @netdev: pointer to port representor netdev
+ * @buf: write here port name
+ * @len: max length of buf
+ */
+static int
+ice_repr_get_phys_port_name(struct net_device *netdev, char *buf, size_t len)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_repr *repr = np->repr;
+ int res;
+
+ /* Devlink port is registered and devlink core is taking care of name formatting. */
+ if (repr->vf->devlink_port.devlink)
+ return -EOPNOTSUPP;
+
+ res = snprintf(buf, len, "pf%dvfr%d", ice_repr_get_sw_port_id(repr),
+ repr->vf->vf_id);
+ if (res <= 0)
+ return -EOPNOTSUPP;
+ return 0;
+}
+
+/**
+ * ice_repr_get_stats64 - get VF stats for VFPR use
+ * @netdev: pointer to port representor netdev
+ * @stats: pointer to struct where stats can be stored
+ */
+static void
+ice_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_eth_stats *eth_stats;
+ struct ice_vsi *vsi;
+
+ if (ice_is_vf_disabled(np->repr->vf))
+ return;
+ vsi = np->repr->src_vsi;
+
+ ice_update_vsi_stats(vsi);
+ eth_stats = &vsi->eth_stats;
+
+ stats->tx_packets = eth_stats->tx_unicast + eth_stats->tx_broadcast +
+ eth_stats->tx_multicast;
+ stats->rx_packets = eth_stats->rx_unicast + eth_stats->rx_broadcast +
+ eth_stats->rx_multicast;
+ stats->tx_bytes = eth_stats->tx_bytes;
+ stats->rx_bytes = eth_stats->rx_bytes;
+ stats->multicast = eth_stats->rx_multicast;
+ stats->tx_errors = eth_stats->tx_errors;
+ stats->tx_dropped = eth_stats->tx_discards;
+ stats->rx_dropped = eth_stats->rx_discards;
+}
+
+/**
+ * ice_netdev_to_repr - Get port representor for given netdevice
+ * @netdev: pointer to port representor netdev
+ */
+struct ice_repr *ice_netdev_to_repr(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+
+ return np->repr;
+}
+
+/**
+ * ice_repr_open - Enable port representor's network interface
+ * @netdev: network interface device structure
+ *
+ * The open entry point is called when a port representor's network
+ * interface is made active by the system (IFF_UP). Corresponding
+ * VF is notified about link status change.
+ *
+ * Returns 0 on success
+ */
+static int ice_repr_open(struct net_device *netdev)
+{
+ struct ice_repr *repr = ice_netdev_to_repr(netdev);
+ struct ice_vf *vf;
+
+ vf = repr->vf;
+ vf->link_forced = true;
+ vf->link_up = true;
+ ice_vc_notify_vf_link_state(vf);
+
+ netif_carrier_on(netdev);
+ netif_tx_start_all_queues(netdev);
+
+ return 0;
+}
+
+/**
+ * ice_repr_stop - Disable port representor's network interface
+ * @netdev: network interface device structure
+ *
+ * The stop entry point is called when a port representor's network
+ * interface is de-activated by the system. Corresponding
+ * VF is notified about link status change.
+ *
+ * Returns 0 on success
+ */
+static int ice_repr_stop(struct net_device *netdev)
+{
+ struct ice_repr *repr = ice_netdev_to_repr(netdev);
+ struct ice_vf *vf;
+
+ vf = repr->vf;
+ vf->link_forced = true;
+ vf->link_up = false;
+ ice_vc_notify_vf_link_state(vf);
+
+ netif_carrier_off(netdev);
+ netif_tx_stop_all_queues(netdev);
+
+ return 0;
+}
+
+static struct devlink_port *
+ice_repr_get_devlink_port(struct net_device *netdev)
+{
+ struct ice_repr *repr = ice_netdev_to_repr(netdev);
+
+ return &repr->vf->devlink_port;
+}
+
+/**
+ * ice_repr_sp_stats64 - get slow path stats for port representor
+ * @dev: network interface device structure
+ * @stats: netlink stats structure
+ *
+ * RX/TX stats are being swapped here to be consistent with VF stats. In slow
+ * path, port representor receives data when the corresponding VF is sending it
+ * (and vice versa), TX and RX bytes/packets are effectively swapped on port
+ * representor.
+ */
+static int
+ice_repr_sp_stats64(const struct net_device *dev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct ice_netdev_priv *np = netdev_priv(dev);
+ int vf_id = np->repr->vf->vf_id;
+ struct ice_tx_ring *tx_ring;
+ struct ice_rx_ring *rx_ring;
+ u64 pkts, bytes;
+
+ tx_ring = np->vsi->tx_rings[vf_id];
+ ice_fetch_u64_stats_per_ring(&tx_ring->syncp, tx_ring->stats,
+ &pkts, &bytes);
+ stats->rx_packets = pkts;
+ stats->rx_bytes = bytes;
+
+ rx_ring = np->vsi->rx_rings[vf_id];
+ ice_fetch_u64_stats_per_ring(&rx_ring->syncp, rx_ring->stats,
+ &pkts, &bytes);
+ stats->tx_packets = pkts;
+ stats->tx_bytes = bytes;
+ stats->tx_dropped = rx_ring->rx_stats.alloc_page_failed +
+ rx_ring->rx_stats.alloc_buf_failed;
+
+ return 0;
+}
+
+static bool
+ice_repr_ndo_has_offload_stats(const struct net_device *dev, int attr_id)
+{
+ return attr_id == IFLA_OFFLOAD_XSTATS_CPU_HIT;
+}
+
+static int
+ice_repr_ndo_get_offload_stats(int attr_id, const struct net_device *dev,
+ void *sp)
+{
+ if (attr_id == IFLA_OFFLOAD_XSTATS_CPU_HIT)
+ return ice_repr_sp_stats64(dev, (struct rtnl_link_stats64 *)sp);
+
+ return -EINVAL;
+}
+
+static int
+ice_repr_setup_tc_cls_flower(struct ice_repr *repr,
+ struct flow_cls_offload *flower)
+{
+ switch (flower->command) {
+ case FLOW_CLS_REPLACE:
+ return ice_add_cls_flower(repr->netdev, repr->src_vsi, flower);
+ case FLOW_CLS_DESTROY:
+ return ice_del_cls_flower(repr->src_vsi, flower);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int
+ice_repr_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct flow_cls_offload *flower = (struct flow_cls_offload *)type_data;
+ struct ice_netdev_priv *np = (struct ice_netdev_priv *)cb_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return ice_repr_setup_tc_cls_flower(np->repr, flower);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static LIST_HEAD(ice_repr_block_cb_list);
+
+static int
+ice_repr_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+ void *type_data)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return flow_block_cb_setup_simple((struct flow_block_offload *)
+ type_data,
+ &ice_repr_block_cb_list,
+ ice_repr_setup_tc_block_cb,
+ np, np, true);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static const struct net_device_ops ice_repr_netdev_ops = {
+ .ndo_get_phys_port_name = ice_repr_get_phys_port_name,
+ .ndo_get_stats64 = ice_repr_get_stats64,
+ .ndo_open = ice_repr_open,
+ .ndo_stop = ice_repr_stop,
+ .ndo_start_xmit = ice_eswitch_port_start_xmit,
+ .ndo_get_devlink_port = ice_repr_get_devlink_port,
+ .ndo_setup_tc = ice_repr_setup_tc,
+ .ndo_has_offload_stats = ice_repr_ndo_has_offload_stats,
+ .ndo_get_offload_stats = ice_repr_ndo_get_offload_stats,
+};
+
+/**
+ * ice_is_port_repr_netdev - Check if a given netdevice is a port representor netdev
+ * @netdev: pointer to netdev
+ */
+bool ice_is_port_repr_netdev(struct net_device *netdev)
+{
+ return netdev && (netdev->netdev_ops == &ice_repr_netdev_ops);
+}
+
+/**
+ * ice_repr_reg_netdev - register port representor netdev
+ * @netdev: pointer to port representor netdev
+ */
+static int
+ice_repr_reg_netdev(struct net_device *netdev)
+{
+ eth_hw_addr_random(netdev);
+ netdev->netdev_ops = &ice_repr_netdev_ops;
+ ice_set_ethtool_repr_ops(netdev);
+
+ netdev->hw_features |= NETIF_F_HW_TC;
+
+ netif_carrier_off(netdev);
+ netif_tx_stop_all_queues(netdev);
+
+ return register_netdev(netdev);
+}
+
+/**
+ * ice_repr_add - add representor for VF
+ * @vf: pointer to VF structure
+ */
+static int ice_repr_add(struct ice_vf *vf)
+{
+ struct ice_q_vector *q_vector;
+ struct ice_netdev_priv *np;
+ struct ice_repr *repr;
+ struct ice_vsi *vsi;
+ int err;
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi)
+ return -EINVAL;
+
+ repr = kzalloc(sizeof(*repr), GFP_KERNEL);
+ if (!repr)
+ return -ENOMEM;
+
+#ifdef CONFIG_ICE_SWITCHDEV
+ repr->mac_rule = kzalloc(sizeof(*repr->mac_rule), GFP_KERNEL);
+ if (!repr->mac_rule) {
+ err = -ENOMEM;
+ goto err_alloc_rule;
+ }
+#endif
+
+ repr->netdev = alloc_etherdev(sizeof(struct ice_netdev_priv));
+ if (!repr->netdev) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+
+ repr->src_vsi = vsi;
+ repr->vf = vf;
+ vf->repr = repr;
+ np = netdev_priv(repr->netdev);
+ np->repr = repr;
+
+ q_vector = kzalloc(sizeof(*q_vector), GFP_KERNEL);
+ if (!q_vector) {
+ err = -ENOMEM;
+ goto err_alloc_q_vector;
+ }
+ repr->q_vector = q_vector;
+
+ err = ice_devlink_create_vf_port(vf);
+ if (err)
+ goto err_devlink;
+
+ repr->netdev->min_mtu = ETH_MIN_MTU;
+ repr->netdev->max_mtu = ICE_MAX_MTU;
+
+ SET_NETDEV_DEV(repr->netdev, ice_pf_to_dev(vf->pf));
+ err = ice_repr_reg_netdev(repr->netdev);
+ if (err)
+ goto err_netdev;
+
+ devlink_port_type_eth_set(&vf->devlink_port, repr->netdev);
+
+ ice_virtchnl_set_repr_ops(vf);
+
+ return 0;
+
+err_netdev:
+ ice_devlink_destroy_vf_port(vf);
+err_devlink:
+ kfree(repr->q_vector);
+ vf->repr->q_vector = NULL;
+err_alloc_q_vector:
+ free_netdev(repr->netdev);
+ repr->netdev = NULL;
+err_alloc:
+#ifdef CONFIG_ICE_SWITCHDEV
+ kfree(repr->mac_rule);
+ repr->mac_rule = NULL;
+err_alloc_rule:
+#endif
+ kfree(repr);
+ vf->repr = NULL;
+ return err;
+}
+
+/**
+ * ice_repr_rem - remove representor from VF
+ * @vf: pointer to VF structure
+ */
+static void ice_repr_rem(struct ice_vf *vf)
+{
+ if (!vf->repr)
+ return;
+
+ kfree(vf->repr->q_vector);
+ vf->repr->q_vector = NULL;
+ unregister_netdev(vf->repr->netdev);
+ ice_devlink_destroy_vf_port(vf);
+ free_netdev(vf->repr->netdev);
+ vf->repr->netdev = NULL;
+#ifdef CONFIG_ICE_SWITCHDEV
+ kfree(vf->repr->mac_rule);
+ vf->repr->mac_rule = NULL;
+#endif
+ kfree(vf->repr);
+ vf->repr = NULL;
+
+ ice_virtchnl_set_dflt_ops(vf);
+}
+
+/**
+ * ice_repr_rem_from_all_vfs - remove port representor for all VFs
+ * @pf: pointer to PF structure
+ */
+void ice_repr_rem_from_all_vfs(struct ice_pf *pf)
+{
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ lockdep_assert_held(&pf->vfs.table_lock);
+
+ ice_for_each_vf(pf, bkt, vf)
+ ice_repr_rem(vf);
+}
+
+/**
+ * ice_repr_add_for_all_vfs - add port representor for all VFs
+ * @pf: pointer to PF structure
+ */
+int ice_repr_add_for_all_vfs(struct ice_pf *pf)
+{
+ struct ice_vf *vf;
+ unsigned int bkt;
+ int err;
+
+ lockdep_assert_held(&pf->vfs.table_lock);
+
+ ice_for_each_vf(pf, bkt, vf) {
+ err = ice_repr_add(vf);
+ if (err)
+ goto err;
+ }
+
+ return 0;
+
+err:
+ ice_repr_rem_from_all_vfs(pf);
+
+ return err;
+}
+
+/**
+ * ice_repr_start_tx_queues - start Tx queues of port representor
+ * @repr: pointer to repr structure
+ */
+void ice_repr_start_tx_queues(struct ice_repr *repr)
+{
+ netif_carrier_on(repr->netdev);
+ netif_tx_start_all_queues(repr->netdev);
+}
+
+/**
+ * ice_repr_stop_tx_queues - stop Tx queues of port representor
+ * @repr: pointer to repr structure
+ */
+void ice_repr_stop_tx_queues(struct ice_repr *repr)
+{
+ netif_carrier_off(repr->netdev);
+ netif_tx_stop_all_queues(repr->netdev);
+}
+
+/**
+ * ice_repr_set_traffic_vsi - set traffic VSI for port representor
+ * @repr: repr on with VSI will be set
+ * @vsi: pointer to VSI that will be used by port representor to pass traffic
+ */
+void ice_repr_set_traffic_vsi(struct ice_repr *repr, struct ice_vsi *vsi)
+{
+ struct ice_netdev_priv *np = netdev_priv(repr->netdev);
+
+ np->vsi = vsi;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.h b/drivers/net/ethernet/intel/ice/ice_repr.h
new file mode 100644
index 000000000..378a45bfa
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_repr.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_REPR_H_
+#define _ICE_REPR_H_
+
+#include <net/dst_metadata.h>
+
+struct ice_repr {
+ struct ice_vsi *src_vsi;
+ struct ice_vf *vf;
+ struct ice_q_vector *q_vector;
+ struct net_device *netdev;
+ struct metadata_dst *dst;
+#ifdef CONFIG_ICE_SWITCHDEV
+ /* info about slow path MAC rule */
+ struct ice_rule_query_data *mac_rule;
+ u8 rule_added;
+#endif
+};
+
+int ice_repr_add_for_all_vfs(struct ice_pf *pf);
+void ice_repr_rem_from_all_vfs(struct ice_pf *pf);
+
+void ice_repr_start_tx_queues(struct ice_repr *repr);
+void ice_repr_stop_tx_queues(struct ice_repr *repr);
+
+void ice_repr_set_traffic_vsi(struct ice_repr *repr, struct ice_vsi *vsi);
+
+struct ice_repr *ice_netdev_to_repr(struct net_device *netdev);
+bool ice_is_port_repr_netdev(struct net_device *netdev);
+#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h
new file mode 100644
index 000000000..ead75fe2b
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _ICE_SBQ_CMD_H_
+#define _ICE_SBQ_CMD_H_
+
+/* This header file defines the Sideband Queue commands, error codes and
+ * descriptor format. It is shared between Firmware and Software.
+ */
+
+/* Sideband Queue command structure and opcodes */
+enum ice_sbq_opc {
+ /* Sideband Queue commands */
+ ice_sbq_opc_neigh_dev_req = 0x0C00,
+ ice_sbq_opc_neigh_dev_ev = 0x0C01
+};
+
+/* Sideband Queue descriptor. Indirect command
+ * and non posted
+ */
+struct ice_sbq_cmd_desc {
+ __le16 flags;
+ __le16 opcode;
+ __le16 datalen;
+ __le16 cmd_retval;
+
+ /* Opaque message data */
+ __le32 cookie_high;
+ __le32 cookie_low;
+
+ union {
+ __le16 cmd_len;
+ __le16 cmpl_len;
+ } param0;
+
+ u8 reserved[6];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+struct ice_sbq_evt_desc {
+ __le16 flags;
+ __le16 opcode;
+ __le16 datalen;
+ __le16 cmd_retval;
+ u8 data[24];
+};
+
+enum ice_sbq_msg_dev {
+ rmn_0 = 0x02,
+ rmn_1 = 0x03,
+ rmn_2 = 0x04,
+ cgu = 0x06
+};
+
+enum ice_sbq_msg_opcode {
+ ice_sbq_msg_rd = 0x00,
+ ice_sbq_msg_wr = 0x01
+};
+
+#define ICE_SBQ_MSG_FLAGS 0x40
+#define ICE_SBQ_MSG_SBE_FBE 0x0F
+
+struct ice_sbq_msg_req {
+ u8 dest_dev;
+ u8 src_dev;
+ u8 opcode;
+ u8 flags;
+ u8 sbe_fbe;
+ u8 func_id;
+ __le16 msg_addr_low;
+ __le32 msg_addr_high;
+ __le32 data;
+};
+
+struct ice_sbq_msg_cmpl {
+ u8 dest_dev;
+ u8 src_dev;
+ u8 opcode;
+ u8 flags;
+ __le32 data;
+};
+
+/* Internal struct */
+struct ice_sbq_msg_input {
+ u8 dest_dev;
+ u8 opcode;
+ u16 msg_addr_low;
+ u32 msg_addr_high;
+ u32 data;
+};
+#endif /* _ICE_SBQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
new file mode 100644
index 000000000..2c62c1763
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -0,0 +1,4263 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_sched.h"
+
+/**
+ * ice_sched_add_root_node - Insert the Tx scheduler root node in SW DB
+ * @pi: port information structure
+ * @info: Scheduler element information from firmware
+ *
+ * This function inserts the root node of the scheduling tree topology
+ * to the SW DB.
+ */
+static int
+ice_sched_add_root_node(struct ice_port_info *pi,
+ struct ice_aqc_txsched_elem_data *info)
+{
+ struct ice_sched_node *root;
+ struct ice_hw *hw;
+
+ if (!pi)
+ return -EINVAL;
+
+ hw = pi->hw;
+
+ root = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*root), GFP_KERNEL);
+ if (!root)
+ return -ENOMEM;
+
+ /* coverity[suspicious_sizeof] */
+ root->children = devm_kcalloc(ice_hw_to_dev(hw), hw->max_children[0],
+ sizeof(*root), GFP_KERNEL);
+ if (!root->children) {
+ devm_kfree(ice_hw_to_dev(hw), root);
+ return -ENOMEM;
+ }
+
+ memcpy(&root->info, info, sizeof(*info));
+ pi->root = root;
+ return 0;
+}
+
+/**
+ * ice_sched_find_node_by_teid - Find the Tx scheduler node in SW DB
+ * @start_node: pointer to the starting ice_sched_node struct in a sub-tree
+ * @teid: node TEID to search
+ *
+ * This function searches for a node matching the TEID in the scheduling tree
+ * from the SW DB. The search is recursive and is restricted by the number of
+ * layers it has searched through; stopping at the max supported layer.
+ *
+ * This function needs to be called when holding the port_info->sched_lock
+ */
+struct ice_sched_node *
+ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid)
+{
+ u16 i;
+
+ /* The TEID is same as that of the start_node */
+ if (ICE_TXSCHED_GET_NODE_TEID(start_node) == teid)
+ return start_node;
+
+ /* The node has no children or is at the max layer */
+ if (!start_node->num_children ||
+ start_node->tx_sched_layer >= ICE_AQC_TOPO_MAX_LEVEL_NUM ||
+ start_node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF)
+ return NULL;
+
+ /* Check if TEID matches to any of the children nodes */
+ for (i = 0; i < start_node->num_children; i++)
+ if (ICE_TXSCHED_GET_NODE_TEID(start_node->children[i]) == teid)
+ return start_node->children[i];
+
+ /* Search within each child's sub-tree */
+ for (i = 0; i < start_node->num_children; i++) {
+ struct ice_sched_node *tmp;
+
+ tmp = ice_sched_find_node_by_teid(start_node->children[i],
+ teid);
+ if (tmp)
+ return tmp;
+ }
+
+ return NULL;
+}
+
+/**
+ * ice_aqc_send_sched_elem_cmd - send scheduling elements cmd
+ * @hw: pointer to the HW struct
+ * @cmd_opc: cmd opcode
+ * @elems_req: number of elements to request
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_resp: returns total number of elements response
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function sends a scheduling elements cmd (cmd_opc)
+ */
+static int
+ice_aqc_send_sched_elem_cmd(struct ice_hw *hw, enum ice_adminq_opc cmd_opc,
+ u16 elems_req, void *buf, u16 buf_size,
+ u16 *elems_resp, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_sched_elem_cmd *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ cmd = &desc.params.sched_elem_cmd;
+ ice_fill_dflt_direct_cmd_desc(&desc, cmd_opc);
+ cmd->num_elem_req = cpu_to_le16(elems_req);
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+ status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+ if (!status && elems_resp)
+ *elems_resp = le16_to_cpu(cmd->num_elem_resp);
+
+ return status;
+}
+
+/**
+ * ice_aq_query_sched_elems - query scheduler elements
+ * @hw: pointer to the HW struct
+ * @elems_req: number of elements to query
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_ret: returns total number of elements returned
+ * @cd: pointer to command details structure or NULL
+ *
+ * Query scheduling elements (0x0404)
+ */
+int
+ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
+ struct ice_aqc_txsched_elem_data *buf, u16 buf_size,
+ u16 *elems_ret, struct ice_sq_cd *cd)
+{
+ return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_get_sched_elems,
+ elems_req, (void *)buf, buf_size,
+ elems_ret, cd);
+}
+
+/**
+ * ice_sched_add_node - Insert the Tx scheduler node in SW DB
+ * @pi: port information structure
+ * @layer: Scheduler layer of the node
+ * @info: Scheduler element information from firmware
+ *
+ * This function inserts a scheduler node to the SW DB.
+ */
+int
+ice_sched_add_node(struct ice_port_info *pi, u8 layer,
+ struct ice_aqc_txsched_elem_data *info)
+{
+ struct ice_aqc_txsched_elem_data elem;
+ struct ice_sched_node *parent;
+ struct ice_sched_node *node;
+ struct ice_hw *hw;
+ int status;
+
+ if (!pi)
+ return -EINVAL;
+
+ hw = pi->hw;
+
+ /* A valid parent node should be there */
+ parent = ice_sched_find_node_by_teid(pi->root,
+ le32_to_cpu(info->parent_teid));
+ if (!parent) {
+ ice_debug(hw, ICE_DBG_SCHED, "Parent Node not found for parent_teid=0x%x\n",
+ le32_to_cpu(info->parent_teid));
+ return -EINVAL;
+ }
+
+ /* query the current node information from FW before adding it
+ * to the SW DB
+ */
+ status = ice_sched_query_elem(hw, le32_to_cpu(info->node_teid), &elem);
+ if (status)
+ return status;
+
+ node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+ if (hw->max_children[layer]) {
+ /* coverity[suspicious_sizeof] */
+ node->children = devm_kcalloc(ice_hw_to_dev(hw),
+ hw->max_children[layer],
+ sizeof(*node), GFP_KERNEL);
+ if (!node->children) {
+ devm_kfree(ice_hw_to_dev(hw), node);
+ return -ENOMEM;
+ }
+ }
+
+ node->in_use = true;
+ node->parent = parent;
+ node->tx_sched_layer = layer;
+ parent->children[parent->num_children++] = node;
+ node->info = elem;
+ return 0;
+}
+
+/**
+ * ice_aq_delete_sched_elems - delete scheduler elements
+ * @hw: pointer to the HW struct
+ * @grps_req: number of groups to delete
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @grps_del: returns total number of elements deleted
+ * @cd: pointer to command details structure or NULL
+ *
+ * Delete scheduling elements (0x040F)
+ */
+static int
+ice_aq_delete_sched_elems(struct ice_hw *hw, u16 grps_req,
+ struct ice_aqc_delete_elem *buf, u16 buf_size,
+ u16 *grps_del, struct ice_sq_cd *cd)
+{
+ return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_delete_sched_elems,
+ grps_req, (void *)buf, buf_size,
+ grps_del, cd);
+}
+
+/**
+ * ice_sched_remove_elems - remove nodes from HW
+ * @hw: pointer to the HW struct
+ * @parent: pointer to the parent node
+ * @num_nodes: number of nodes
+ * @node_teids: array of node teids to be deleted
+ *
+ * This function remove nodes from HW
+ */
+static int
+ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent,
+ u16 num_nodes, u32 *node_teids)
+{
+ struct ice_aqc_delete_elem *buf;
+ u16 i, num_groups_removed = 0;
+ u16 buf_size;
+ int status;
+
+ buf_size = struct_size(buf, teid, num_nodes);
+ buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ buf->hdr.parent_teid = parent->info.node_teid;
+ buf->hdr.num_elems = cpu_to_le16(num_nodes);
+ for (i = 0; i < num_nodes; i++)
+ buf->teid[i] = cpu_to_le32(node_teids[i]);
+
+ status = ice_aq_delete_sched_elems(hw, 1, buf, buf_size,
+ &num_groups_removed, NULL);
+ if (status || num_groups_removed != 1)
+ ice_debug(hw, ICE_DBG_SCHED, "remove node failed FW error %d\n",
+ hw->adminq.sq_last_status);
+
+ devm_kfree(ice_hw_to_dev(hw), buf);
+ return status;
+}
+
+/**
+ * ice_sched_get_first_node - get the first node of the given layer
+ * @pi: port information structure
+ * @parent: pointer the base node of the subtree
+ * @layer: layer number
+ *
+ * This function retrieves the first node of the given layer from the subtree
+ */
+static struct ice_sched_node *
+ice_sched_get_first_node(struct ice_port_info *pi,
+ struct ice_sched_node *parent, u8 layer)
+{
+ return pi->sib_head[parent->tc_num][layer];
+}
+
+/**
+ * ice_sched_get_tc_node - get pointer to TC node
+ * @pi: port information structure
+ * @tc: TC number
+ *
+ * This function returns the TC node pointer
+ */
+struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc)
+{
+ u8 i;
+
+ if (!pi || !pi->root)
+ return NULL;
+ for (i = 0; i < pi->root->num_children; i++)
+ if (pi->root->children[i]->tc_num == tc)
+ return pi->root->children[i];
+ return NULL;
+}
+
+/**
+ * ice_free_sched_node - Free a Tx scheduler node from SW DB
+ * @pi: port information structure
+ * @node: pointer to the ice_sched_node struct
+ *
+ * This function frees up a node from SW DB as well as from HW
+ *
+ * This function needs to be called with the port_info->sched_lock held
+ */
+void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node)
+{
+ struct ice_sched_node *parent;
+ struct ice_hw *hw = pi->hw;
+ u8 i, j;
+
+ /* Free the children before freeing up the parent node
+ * The parent array is updated below and that shifts the nodes
+ * in the array. So always pick the first child if num children > 0
+ */
+ while (node->num_children)
+ ice_free_sched_node(pi, node->children[0]);
+
+ /* Leaf, TC and root nodes can't be deleted by SW */
+ if (node->tx_sched_layer >= hw->sw_entry_point_layer &&
+ node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
+ node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT &&
+ node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) {
+ u32 teid = le32_to_cpu(node->info.node_teid);
+
+ ice_sched_remove_elems(hw, node->parent, 1, &teid);
+ }
+ parent = node->parent;
+ /* root has no parent */
+ if (parent) {
+ struct ice_sched_node *p;
+
+ /* update the parent */
+ for (i = 0; i < parent->num_children; i++)
+ if (parent->children[i] == node) {
+ for (j = i + 1; j < parent->num_children; j++)
+ parent->children[j - 1] =
+ parent->children[j];
+ parent->num_children--;
+ break;
+ }
+
+ p = ice_sched_get_first_node(pi, node, node->tx_sched_layer);
+ while (p) {
+ if (p->sibling == node) {
+ p->sibling = node->sibling;
+ break;
+ }
+ p = p->sibling;
+ }
+
+ /* update the sibling head if head is getting removed */
+ if (pi->sib_head[node->tc_num][node->tx_sched_layer] == node)
+ pi->sib_head[node->tc_num][node->tx_sched_layer] =
+ node->sibling;
+ }
+
+ /* leaf nodes have no children */
+ if (node->children)
+ devm_kfree(ice_hw_to_dev(hw), node->children);
+ devm_kfree(ice_hw_to_dev(hw), node);
+}
+
+/**
+ * ice_aq_get_dflt_topo - gets default scheduler topology
+ * @hw: pointer to the HW struct
+ * @lport: logical port number
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_branches: returns total number of queue to port branches
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get default scheduler topology (0x400)
+ */
+static int
+ice_aq_get_dflt_topo(struct ice_hw *hw, u8 lport,
+ struct ice_aqc_get_topo_elem *buf, u16 buf_size,
+ u8 *num_branches, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_get_topo *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ cmd = &desc.params.get_topo;
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_dflt_topo);
+ cmd->port_num = lport;
+ status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+ if (!status && num_branches)
+ *num_branches = cmd->num_branches;
+
+ return status;
+}
+
+/**
+ * ice_aq_add_sched_elems - adds scheduling element
+ * @hw: pointer to the HW struct
+ * @grps_req: the number of groups that are requested to be added
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @grps_added: returns total number of groups added
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add scheduling elements (0x0401)
+ */
+static int
+ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req,
+ struct ice_aqc_add_elem *buf, u16 buf_size,
+ u16 *grps_added, struct ice_sq_cd *cd)
+{
+ return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_add_sched_elems,
+ grps_req, (void *)buf, buf_size,
+ grps_added, cd);
+}
+
+/**
+ * ice_aq_cfg_sched_elems - configures scheduler elements
+ * @hw: pointer to the HW struct
+ * @elems_req: number of elements to configure
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_cfgd: returns total number of elements configured
+ * @cd: pointer to command details structure or NULL
+ *
+ * Configure scheduling elements (0x0403)
+ */
+static int
+ice_aq_cfg_sched_elems(struct ice_hw *hw, u16 elems_req,
+ struct ice_aqc_txsched_elem_data *buf, u16 buf_size,
+ u16 *elems_cfgd, struct ice_sq_cd *cd)
+{
+ return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_cfg_sched_elems,
+ elems_req, (void *)buf, buf_size,
+ elems_cfgd, cd);
+}
+
+/**
+ * ice_aq_move_sched_elems - move scheduler elements
+ * @hw: pointer to the HW struct
+ * @grps_req: number of groups to move
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @grps_movd: returns total number of groups moved
+ * @cd: pointer to command details structure or NULL
+ *
+ * Move scheduling elements (0x0408)
+ */
+static int
+ice_aq_move_sched_elems(struct ice_hw *hw, u16 grps_req,
+ struct ice_aqc_move_elem *buf, u16 buf_size,
+ u16 *grps_movd, struct ice_sq_cd *cd)
+{
+ return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_move_sched_elems,
+ grps_req, (void *)buf, buf_size,
+ grps_movd, cd);
+}
+
+/**
+ * ice_aq_suspend_sched_elems - suspend scheduler elements
+ * @hw: pointer to the HW struct
+ * @elems_req: number of elements to suspend
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_ret: returns total number of elements suspended
+ * @cd: pointer to command details structure or NULL
+ *
+ * Suspend scheduling elements (0x0409)
+ */
+static int
+ice_aq_suspend_sched_elems(struct ice_hw *hw, u16 elems_req, __le32 *buf,
+ u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
+{
+ return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_suspend_sched_elems,
+ elems_req, (void *)buf, buf_size,
+ elems_ret, cd);
+}
+
+/**
+ * ice_aq_resume_sched_elems - resume scheduler elements
+ * @hw: pointer to the HW struct
+ * @elems_req: number of elements to resume
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_ret: returns total number of elements resumed
+ * @cd: pointer to command details structure or NULL
+ *
+ * resume scheduling elements (0x040A)
+ */
+static int
+ice_aq_resume_sched_elems(struct ice_hw *hw, u16 elems_req, __le32 *buf,
+ u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
+{
+ return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_resume_sched_elems,
+ elems_req, (void *)buf, buf_size,
+ elems_ret, cd);
+}
+
+/**
+ * ice_aq_query_sched_res - query scheduler resource
+ * @hw: pointer to the HW struct
+ * @buf_size: buffer size in bytes
+ * @buf: pointer to buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * Query scheduler resource allocation (0x0412)
+ */
+static int
+ice_aq_query_sched_res(struct ice_hw *hw, u16 buf_size,
+ struct ice_aqc_query_txsched_res_resp *buf,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aq_desc desc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_sched_res);
+ return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+}
+
+/**
+ * ice_sched_suspend_resume_elems - suspend or resume HW nodes
+ * @hw: pointer to the HW struct
+ * @num_nodes: number of nodes
+ * @node_teids: array of node teids to be suspended or resumed
+ * @suspend: true means suspend / false means resume
+ *
+ * This function suspends or resumes HW nodes
+ */
+static int
+ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids,
+ bool suspend)
+{
+ u16 i, buf_size, num_elem_ret = 0;
+ __le32 *buf;
+ int status;
+
+ buf_size = sizeof(*buf) * num_nodes;
+ buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ for (i = 0; i < num_nodes; i++)
+ buf[i] = cpu_to_le32(node_teids[i]);
+
+ if (suspend)
+ status = ice_aq_suspend_sched_elems(hw, num_nodes, buf,
+ buf_size, &num_elem_ret,
+ NULL);
+ else
+ status = ice_aq_resume_sched_elems(hw, num_nodes, buf,
+ buf_size, &num_elem_ret,
+ NULL);
+ if (status || num_elem_ret != num_nodes)
+ ice_debug(hw, ICE_DBG_SCHED, "suspend/resume failed\n");
+
+ devm_kfree(ice_hw_to_dev(hw), buf);
+ return status;
+}
+
+/**
+ * ice_alloc_lan_q_ctx - allocate LAN queue contexts for the given VSI and TC
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ * @tc: TC number
+ * @new_numqs: number of queues
+ */
+static int
+ice_alloc_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
+{
+ struct ice_vsi_ctx *vsi_ctx;
+ struct ice_q_ctx *q_ctx;
+
+ vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+ if (!vsi_ctx)
+ return -EINVAL;
+ /* allocate LAN queue contexts */
+ if (!vsi_ctx->lan_q_ctx[tc]) {
+ vsi_ctx->lan_q_ctx[tc] = devm_kcalloc(ice_hw_to_dev(hw),
+ new_numqs,
+ sizeof(*q_ctx),
+ GFP_KERNEL);
+ if (!vsi_ctx->lan_q_ctx[tc])
+ return -ENOMEM;
+ vsi_ctx->num_lan_q_entries[tc] = new_numqs;
+ return 0;
+ }
+ /* num queues are increased, update the queue contexts */
+ if (new_numqs > vsi_ctx->num_lan_q_entries[tc]) {
+ u16 prev_num = vsi_ctx->num_lan_q_entries[tc];
+
+ q_ctx = devm_kcalloc(ice_hw_to_dev(hw), new_numqs,
+ sizeof(*q_ctx), GFP_KERNEL);
+ if (!q_ctx)
+ return -ENOMEM;
+ memcpy(q_ctx, vsi_ctx->lan_q_ctx[tc],
+ prev_num * sizeof(*q_ctx));
+ devm_kfree(ice_hw_to_dev(hw), vsi_ctx->lan_q_ctx[tc]);
+ vsi_ctx->lan_q_ctx[tc] = q_ctx;
+ vsi_ctx->num_lan_q_entries[tc] = new_numqs;
+ }
+ return 0;
+}
+
+/**
+ * ice_alloc_rdma_q_ctx - allocate RDMA queue contexts for the given VSI and TC
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ * @tc: TC number
+ * @new_numqs: number of queues
+ */
+static int
+ice_alloc_rdma_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
+{
+ struct ice_vsi_ctx *vsi_ctx;
+ struct ice_q_ctx *q_ctx;
+
+ vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+ if (!vsi_ctx)
+ return -EINVAL;
+ /* allocate RDMA queue contexts */
+ if (!vsi_ctx->rdma_q_ctx[tc]) {
+ vsi_ctx->rdma_q_ctx[tc] = devm_kcalloc(ice_hw_to_dev(hw),
+ new_numqs,
+ sizeof(*q_ctx),
+ GFP_KERNEL);
+ if (!vsi_ctx->rdma_q_ctx[tc])
+ return -ENOMEM;
+ vsi_ctx->num_rdma_q_entries[tc] = new_numqs;
+ return 0;
+ }
+ /* num queues are increased, update the queue contexts */
+ if (new_numqs > vsi_ctx->num_rdma_q_entries[tc]) {
+ u16 prev_num = vsi_ctx->num_rdma_q_entries[tc];
+
+ q_ctx = devm_kcalloc(ice_hw_to_dev(hw), new_numqs,
+ sizeof(*q_ctx), GFP_KERNEL);
+ if (!q_ctx)
+ return -ENOMEM;
+ memcpy(q_ctx, vsi_ctx->rdma_q_ctx[tc],
+ prev_num * sizeof(*q_ctx));
+ devm_kfree(ice_hw_to_dev(hw), vsi_ctx->rdma_q_ctx[tc]);
+ vsi_ctx->rdma_q_ctx[tc] = q_ctx;
+ vsi_ctx->num_rdma_q_entries[tc] = new_numqs;
+ }
+ return 0;
+}
+
+/**
+ * ice_aq_rl_profile - performs a rate limiting task
+ * @hw: pointer to the HW struct
+ * @opcode: opcode for add, query, or remove profile(s)
+ * @num_profiles: the number of profiles
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_processed: number of processed add or remove profile(s) to return
+ * @cd: pointer to command details structure
+ *
+ * RL profile function to add, query, or remove profile(s)
+ */
+static int
+ice_aq_rl_profile(struct ice_hw *hw, enum ice_adminq_opc opcode,
+ u16 num_profiles, struct ice_aqc_rl_profile_elem *buf,
+ u16 buf_size, u16 *num_processed, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_rl_profile *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ cmd = &desc.params.rl_profile;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, opcode);
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+ cmd->num_profiles = cpu_to_le16(num_profiles);
+ status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+ if (!status && num_processed)
+ *num_processed = le16_to_cpu(cmd->num_processed);
+ return status;
+}
+
+/**
+ * ice_aq_add_rl_profile - adds rate limiting profile(s)
+ * @hw: pointer to the HW struct
+ * @num_profiles: the number of profile(s) to be add
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_profiles_added: total number of profiles added to return
+ * @cd: pointer to command details structure
+ *
+ * Add RL profile (0x0410)
+ */
+static int
+ice_aq_add_rl_profile(struct ice_hw *hw, u16 num_profiles,
+ struct ice_aqc_rl_profile_elem *buf, u16 buf_size,
+ u16 *num_profiles_added, struct ice_sq_cd *cd)
+{
+ return ice_aq_rl_profile(hw, ice_aqc_opc_add_rl_profiles, num_profiles,
+ buf, buf_size, num_profiles_added, cd);
+}
+
+/**
+ * ice_aq_remove_rl_profile - removes RL profile(s)
+ * @hw: pointer to the HW struct
+ * @num_profiles: the number of profile(s) to remove
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_profiles_removed: total number of profiles removed to return
+ * @cd: pointer to command details structure or NULL
+ *
+ * Remove RL profile (0x0415)
+ */
+static int
+ice_aq_remove_rl_profile(struct ice_hw *hw, u16 num_profiles,
+ struct ice_aqc_rl_profile_elem *buf, u16 buf_size,
+ u16 *num_profiles_removed, struct ice_sq_cd *cd)
+{
+ return ice_aq_rl_profile(hw, ice_aqc_opc_remove_rl_profiles,
+ num_profiles, buf, buf_size,
+ num_profiles_removed, cd);
+}
+
+/**
+ * ice_sched_del_rl_profile - remove RL profile
+ * @hw: pointer to the HW struct
+ * @rl_info: rate limit profile information
+ *
+ * If the profile ID is not referenced anymore, it removes profile ID with
+ * its associated parameters from HW DB,and locally. The caller needs to
+ * hold scheduler lock.
+ */
+static int
+ice_sched_del_rl_profile(struct ice_hw *hw,
+ struct ice_aqc_rl_profile_info *rl_info)
+{
+ struct ice_aqc_rl_profile_elem *buf;
+ u16 num_profiles_removed;
+ u16 num_profiles = 1;
+ int status;
+
+ if (rl_info->prof_id_ref != 0)
+ return -EBUSY;
+
+ /* Safe to remove profile ID */
+ buf = &rl_info->profile;
+ status = ice_aq_remove_rl_profile(hw, num_profiles, buf, sizeof(*buf),
+ &num_profiles_removed, NULL);
+ if (status || num_profiles_removed != num_profiles)
+ return -EIO;
+
+ /* Delete stale entry now */
+ list_del(&rl_info->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), rl_info);
+ return status;
+}
+
+/**
+ * ice_sched_clear_rl_prof - clears RL prof entries
+ * @pi: port information structure
+ *
+ * This function removes all RL profile from HW as well as from SW DB.
+ */
+static void ice_sched_clear_rl_prof(struct ice_port_info *pi)
+{
+ u16 ln;
+
+ for (ln = 0; ln < pi->hw->num_tx_sched_layers; ln++) {
+ struct ice_aqc_rl_profile_info *rl_prof_elem;
+ struct ice_aqc_rl_profile_info *rl_prof_tmp;
+
+ list_for_each_entry_safe(rl_prof_elem, rl_prof_tmp,
+ &pi->rl_prof_list[ln], list_entry) {
+ struct ice_hw *hw = pi->hw;
+ int status;
+
+ rl_prof_elem->prof_id_ref = 0;
+ status = ice_sched_del_rl_profile(hw, rl_prof_elem);
+ if (status) {
+ ice_debug(hw, ICE_DBG_SCHED, "Remove rl profile failed\n");
+ /* On error, free mem required */
+ list_del(&rl_prof_elem->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), rl_prof_elem);
+ }
+ }
+ }
+}
+
+/**
+ * ice_sched_clear_agg - clears the aggregator related information
+ * @hw: pointer to the hardware structure
+ *
+ * This function removes aggregator list and free up aggregator related memory
+ * previously allocated.
+ */
+void ice_sched_clear_agg(struct ice_hw *hw)
+{
+ struct ice_sched_agg_info *agg_info;
+ struct ice_sched_agg_info *atmp;
+
+ list_for_each_entry_safe(agg_info, atmp, &hw->agg_list, list_entry) {
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+ struct ice_sched_agg_vsi_info *vtmp;
+
+ list_for_each_entry_safe(agg_vsi_info, vtmp,
+ &agg_info->agg_vsi_list, list_entry) {
+ list_del(&agg_vsi_info->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), agg_vsi_info);
+ }
+ list_del(&agg_info->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), agg_info);
+ }
+}
+
+/**
+ * ice_sched_clear_tx_topo - clears the scheduler tree nodes
+ * @pi: port information structure
+ *
+ * This function removes all the nodes from HW as well as from SW DB.
+ */
+static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
+{
+ if (!pi)
+ return;
+ /* remove RL profiles related lists */
+ ice_sched_clear_rl_prof(pi);
+ if (pi->root) {
+ ice_free_sched_node(pi, pi->root);
+ pi->root = NULL;
+ }
+}
+
+/**
+ * ice_sched_clear_port - clear the scheduler elements from SW DB for a port
+ * @pi: port information structure
+ *
+ * Cleanup scheduling elements from SW DB
+ */
+void ice_sched_clear_port(struct ice_port_info *pi)
+{
+ if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+ return;
+
+ pi->port_state = ICE_SCHED_PORT_STATE_INIT;
+ mutex_lock(&pi->sched_lock);
+ ice_sched_clear_tx_topo(pi);
+ mutex_unlock(&pi->sched_lock);
+ mutex_destroy(&pi->sched_lock);
+}
+
+/**
+ * ice_sched_cleanup_all - cleanup scheduler elements from SW DB for all ports
+ * @hw: pointer to the HW struct
+ *
+ * Cleanup scheduling elements from SW DB for all the ports
+ */
+void ice_sched_cleanup_all(struct ice_hw *hw)
+{
+ if (!hw)
+ return;
+
+ if (hw->layer_info) {
+ devm_kfree(ice_hw_to_dev(hw), hw->layer_info);
+ hw->layer_info = NULL;
+ }
+
+ ice_sched_clear_port(hw->port_info);
+
+ hw->num_tx_sched_layers = 0;
+ hw->num_tx_sched_phys_layers = 0;
+ hw->flattened_layers = 0;
+ hw->max_cgds = 0;
+}
+
+/**
+ * ice_sched_add_elems - add nodes to HW and SW DB
+ * @pi: port information structure
+ * @tc_node: pointer to the branch node
+ * @parent: pointer to the parent node
+ * @layer: layer number to add nodes
+ * @num_nodes: number of nodes
+ * @num_nodes_added: pointer to num nodes added
+ * @first_node_teid: if new nodes are added then return the TEID of first node
+ *
+ * This function add nodes to HW as well as to SW DB for a given layer
+ */
+static int
+ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
+ struct ice_sched_node *parent, u8 layer, u16 num_nodes,
+ u16 *num_nodes_added, u32 *first_node_teid)
+{
+ struct ice_sched_node *prev, *new_node;
+ struct ice_aqc_add_elem *buf;
+ u16 i, num_groups_added = 0;
+ struct ice_hw *hw = pi->hw;
+ size_t buf_size;
+ int status = 0;
+ u32 teid;
+
+ buf_size = struct_size(buf, generic, num_nodes);
+ buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ buf->hdr.parent_teid = parent->info.node_teid;
+ buf->hdr.num_elems = cpu_to_le16(num_nodes);
+ for (i = 0; i < num_nodes; i++) {
+ buf->generic[i].parent_teid = parent->info.node_teid;
+ buf->generic[i].data.elem_type = ICE_AQC_ELEM_TYPE_SE_GENERIC;
+ buf->generic[i].data.valid_sections =
+ ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
+ ICE_AQC_ELEM_VALID_EIR;
+ buf->generic[i].data.generic = 0;
+ buf->generic[i].data.cir_bw.bw_profile_idx =
+ cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+ buf->generic[i].data.cir_bw.bw_alloc =
+ cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+ buf->generic[i].data.eir_bw.bw_profile_idx =
+ cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+ buf->generic[i].data.eir_bw.bw_alloc =
+ cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+ }
+
+ status = ice_aq_add_sched_elems(hw, 1, buf, buf_size,
+ &num_groups_added, NULL);
+ if (status || num_groups_added != 1) {
+ ice_debug(hw, ICE_DBG_SCHED, "add node failed FW Error %d\n",
+ hw->adminq.sq_last_status);
+ devm_kfree(ice_hw_to_dev(hw), buf);
+ return -EIO;
+ }
+
+ *num_nodes_added = num_nodes;
+ /* add nodes to the SW DB */
+ for (i = 0; i < num_nodes; i++) {
+ status = ice_sched_add_node(pi, layer, &buf->generic[i]);
+ if (status) {
+ ice_debug(hw, ICE_DBG_SCHED, "add nodes in SW DB failed status =%d\n",
+ status);
+ break;
+ }
+
+ teid = le32_to_cpu(buf->generic[i].node_teid);
+ new_node = ice_sched_find_node_by_teid(parent, teid);
+ if (!new_node) {
+ ice_debug(hw, ICE_DBG_SCHED, "Node is missing for teid =%d\n", teid);
+ break;
+ }
+
+ new_node->sibling = NULL;
+ new_node->tc_num = tc_node->tc_num;
+
+ /* add it to previous node sibling pointer */
+ /* Note: siblings are not linked across branches */
+ prev = ice_sched_get_first_node(pi, tc_node, layer);
+ if (prev && prev != new_node) {
+ while (prev->sibling)
+ prev = prev->sibling;
+ prev->sibling = new_node;
+ }
+
+ /* initialize the sibling head */
+ if (!pi->sib_head[tc_node->tc_num][layer])
+ pi->sib_head[tc_node->tc_num][layer] = new_node;
+
+ if (i == 0)
+ *first_node_teid = teid;
+ }
+
+ devm_kfree(ice_hw_to_dev(hw), buf);
+ return status;
+}
+
+/**
+ * ice_sched_add_nodes_to_hw_layer - Add nodes to HW layer
+ * @pi: port information structure
+ * @tc_node: pointer to TC node
+ * @parent: pointer to parent node
+ * @layer: layer number to add nodes
+ * @num_nodes: number of nodes to be added
+ * @first_node_teid: pointer to the first node TEID
+ * @num_nodes_added: pointer to number of nodes added
+ *
+ * Add nodes into specific HW layer.
+ */
+static int
+ice_sched_add_nodes_to_hw_layer(struct ice_port_info *pi,
+ struct ice_sched_node *tc_node,
+ struct ice_sched_node *parent, u8 layer,
+ u16 num_nodes, u32 *first_node_teid,
+ u16 *num_nodes_added)
+{
+ u16 max_child_nodes;
+
+ *num_nodes_added = 0;
+
+ if (!num_nodes)
+ return 0;
+
+ if (!parent || layer < pi->hw->sw_entry_point_layer)
+ return -EINVAL;
+
+ /* max children per node per layer */
+ max_child_nodes = pi->hw->max_children[parent->tx_sched_layer];
+
+ /* current number of children + required nodes exceed max children */
+ if ((parent->num_children + num_nodes) > max_child_nodes) {
+ /* Fail if the parent is a TC node */
+ if (parent == tc_node)
+ return -EIO;
+ return -ENOSPC;
+ }
+
+ return ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes,
+ num_nodes_added, first_node_teid);
+}
+
+/**
+ * ice_sched_add_nodes_to_layer - Add nodes to a given layer
+ * @pi: port information structure
+ * @tc_node: pointer to TC node
+ * @parent: pointer to parent node
+ * @layer: layer number to add nodes
+ * @num_nodes: number of nodes to be added
+ * @first_node_teid: pointer to the first node TEID
+ * @num_nodes_added: pointer to number of nodes added
+ *
+ * This function add nodes to a given layer.
+ */
+static int
+ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
+ struct ice_sched_node *tc_node,
+ struct ice_sched_node *parent, u8 layer,
+ u16 num_nodes, u32 *first_node_teid,
+ u16 *num_nodes_added)
+{
+ u32 *first_teid_ptr = first_node_teid;
+ u16 new_num_nodes = num_nodes;
+ int status = 0;
+
+ *num_nodes_added = 0;
+ while (*num_nodes_added < num_nodes) {
+ u16 max_child_nodes, num_added = 0;
+ /* cppcheck-suppress unusedVariable */
+ u32 temp;
+
+ status = ice_sched_add_nodes_to_hw_layer(pi, tc_node, parent,
+ layer, new_num_nodes,
+ first_teid_ptr,
+ &num_added);
+ if (!status)
+ *num_nodes_added += num_added;
+ /* added more nodes than requested ? */
+ if (*num_nodes_added > num_nodes) {
+ ice_debug(pi->hw, ICE_DBG_SCHED, "added extra nodes %d %d\n", num_nodes,
+ *num_nodes_added);
+ status = -EIO;
+ break;
+ }
+ /* break if all the nodes are added successfully */
+ if (!status && (*num_nodes_added == num_nodes))
+ break;
+ /* break if the error is not max limit */
+ if (status && status != -ENOSPC)
+ break;
+ /* Exceeded the max children */
+ max_child_nodes = pi->hw->max_children[parent->tx_sched_layer];
+ /* utilize all the spaces if the parent is not full */
+ if (parent->num_children < max_child_nodes) {
+ new_num_nodes = max_child_nodes - parent->num_children;
+ } else {
+ /* This parent is full, try the next sibling */
+ parent = parent->sibling;
+ /* Don't modify the first node TEID memory if the
+ * first node was added already in the above call.
+ * Instead send some temp memory for all other
+ * recursive calls.
+ */
+ if (num_added)
+ first_teid_ptr = &temp;
+
+ new_num_nodes = num_nodes - *num_nodes_added;
+ }
+ }
+ return status;
+}
+
+/**
+ * ice_sched_get_qgrp_layer - get the current queue group layer number
+ * @hw: pointer to the HW struct
+ *
+ * This function returns the current queue group layer number
+ */
+static u8 ice_sched_get_qgrp_layer(struct ice_hw *hw)
+{
+ /* It's always total layers - 1, the array is 0 relative so -2 */
+ return hw->num_tx_sched_layers - ICE_QGRP_LAYER_OFFSET;
+}
+
+/**
+ * ice_sched_get_vsi_layer - get the current VSI layer number
+ * @hw: pointer to the HW struct
+ *
+ * This function returns the current VSI layer number
+ */
+static u8 ice_sched_get_vsi_layer(struct ice_hw *hw)
+{
+ /* Num Layers VSI layer
+ * 9 6
+ * 7 4
+ * 5 or less sw_entry_point_layer
+ */
+ /* calculate the VSI layer based on number of layers. */
+ if (hw->num_tx_sched_layers > ICE_VSI_LAYER_OFFSET + 1) {
+ u8 layer = hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET;
+
+ if (layer > hw->sw_entry_point_layer)
+ return layer;
+ }
+ return hw->sw_entry_point_layer;
+}
+
+/**
+ * ice_sched_get_agg_layer - get the current aggregator layer number
+ * @hw: pointer to the HW struct
+ *
+ * This function returns the current aggregator layer number
+ */
+static u8 ice_sched_get_agg_layer(struct ice_hw *hw)
+{
+ /* Num Layers aggregator layer
+ * 9 4
+ * 7 or less sw_entry_point_layer
+ */
+ /* calculate the aggregator layer based on number of layers. */
+ if (hw->num_tx_sched_layers > ICE_AGG_LAYER_OFFSET + 1) {
+ u8 layer = hw->num_tx_sched_layers - ICE_AGG_LAYER_OFFSET;
+
+ if (layer > hw->sw_entry_point_layer)
+ return layer;
+ }
+ return hw->sw_entry_point_layer;
+}
+
+/**
+ * ice_rm_dflt_leaf_node - remove the default leaf node in the tree
+ * @pi: port information structure
+ *
+ * This function removes the leaf node that was created by the FW
+ * during initialization
+ */
+static void ice_rm_dflt_leaf_node(struct ice_port_info *pi)
+{
+ struct ice_sched_node *node;
+
+ node = pi->root;
+ while (node) {
+ if (!node->num_children)
+ break;
+ node = node->children[0];
+ }
+ if (node && node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF) {
+ u32 teid = le32_to_cpu(node->info.node_teid);
+ int status;
+
+ /* remove the default leaf node */
+ status = ice_sched_remove_elems(pi->hw, node->parent, 1, &teid);
+ if (!status)
+ ice_free_sched_node(pi, node);
+ }
+}
+
+/**
+ * ice_sched_rm_dflt_nodes - free the default nodes in the tree
+ * @pi: port information structure
+ *
+ * This function frees all the nodes except root and TC that were created by
+ * the FW during initialization
+ */
+static void ice_sched_rm_dflt_nodes(struct ice_port_info *pi)
+{
+ struct ice_sched_node *node;
+
+ ice_rm_dflt_leaf_node(pi);
+
+ /* remove the default nodes except TC and root nodes */
+ node = pi->root;
+ while (node) {
+ if (node->tx_sched_layer >= pi->hw->sw_entry_point_layer &&
+ node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
+ node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT) {
+ ice_free_sched_node(pi, node);
+ break;
+ }
+
+ if (!node->num_children)
+ break;
+ node = node->children[0];
+ }
+}
+
+/**
+ * ice_sched_init_port - Initialize scheduler by querying information from FW
+ * @pi: port info structure for the tree to cleanup
+ *
+ * This function is the initial call to find the total number of Tx scheduler
+ * resources, default topology created by firmware and storing the information
+ * in SW DB.
+ */
+int ice_sched_init_port(struct ice_port_info *pi)
+{
+ struct ice_aqc_get_topo_elem *buf;
+ struct ice_hw *hw;
+ u8 num_branches;
+ u16 num_elems;
+ int status;
+ u8 i, j;
+
+ if (!pi)
+ return -EINVAL;
+ hw = pi->hw;
+
+ /* Query the Default Topology from FW */
+ buf = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ /* Query default scheduling tree topology */
+ status = ice_aq_get_dflt_topo(hw, pi->lport, buf, ICE_AQ_MAX_BUF_LEN,
+ &num_branches, NULL);
+ if (status)
+ goto err_init_port;
+
+ /* num_branches should be between 1-8 */
+ if (num_branches < 1 || num_branches > ICE_TXSCHED_MAX_BRANCHES) {
+ ice_debug(hw, ICE_DBG_SCHED, "num_branches unexpected %d\n",
+ num_branches);
+ status = -EINVAL;
+ goto err_init_port;
+ }
+
+ /* get the number of elements on the default/first branch */
+ num_elems = le16_to_cpu(buf[0].hdr.num_elems);
+
+ /* num_elems should always be between 1-9 */
+ if (num_elems < 1 || num_elems > ICE_AQC_TOPO_MAX_LEVEL_NUM) {
+ ice_debug(hw, ICE_DBG_SCHED, "num_elems unexpected %d\n",
+ num_elems);
+ status = -EINVAL;
+ goto err_init_port;
+ }
+
+ /* If the last node is a leaf node then the index of the queue group
+ * layer is two less than the number of elements.
+ */
+ if (num_elems > 2 && buf[0].generic[num_elems - 1].data.elem_type ==
+ ICE_AQC_ELEM_TYPE_LEAF)
+ pi->last_node_teid =
+ le32_to_cpu(buf[0].generic[num_elems - 2].node_teid);
+ else
+ pi->last_node_teid =
+ le32_to_cpu(buf[0].generic[num_elems - 1].node_teid);
+
+ /* Insert the Tx Sched root node */
+ status = ice_sched_add_root_node(pi, &buf[0].generic[0]);
+ if (status)
+ goto err_init_port;
+
+ /* Parse the default tree and cache the information */
+ for (i = 0; i < num_branches; i++) {
+ num_elems = le16_to_cpu(buf[i].hdr.num_elems);
+
+ /* Skip root element as already inserted */
+ for (j = 1; j < num_elems; j++) {
+ /* update the sw entry point */
+ if (buf[0].generic[j].data.elem_type ==
+ ICE_AQC_ELEM_TYPE_ENTRY_POINT)
+ hw->sw_entry_point_layer = j;
+
+ status = ice_sched_add_node(pi, j, &buf[i].generic[j]);
+ if (status)
+ goto err_init_port;
+ }
+ }
+
+ /* Remove the default nodes. */
+ if (pi->root)
+ ice_sched_rm_dflt_nodes(pi);
+
+ /* initialize the port for handling the scheduler tree */
+ pi->port_state = ICE_SCHED_PORT_STATE_READY;
+ mutex_init(&pi->sched_lock);
+ for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
+ INIT_LIST_HEAD(&pi->rl_prof_list[i]);
+
+err_init_port:
+ if (status && pi->root) {
+ ice_free_sched_node(pi, pi->root);
+ pi->root = NULL;
+ }
+
+ kfree(buf);
+ return status;
+}
+
+/**
+ * ice_sched_query_res_alloc - query the FW for num of logical sched layers
+ * @hw: pointer to the HW struct
+ *
+ * query FW for allocated scheduler resources and store in HW struct
+ */
+int ice_sched_query_res_alloc(struct ice_hw *hw)
+{
+ struct ice_aqc_query_txsched_res_resp *buf;
+ __le16 max_sibl;
+ int status = 0;
+ u16 i;
+
+ if (hw->layer_info)
+ return status;
+
+ buf = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*buf), GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ status = ice_aq_query_sched_res(hw, sizeof(*buf), buf, NULL);
+ if (status)
+ goto sched_query_out;
+
+ hw->num_tx_sched_layers = le16_to_cpu(buf->sched_props.logical_levels);
+ hw->num_tx_sched_phys_layers =
+ le16_to_cpu(buf->sched_props.phys_levels);
+ hw->flattened_layers = buf->sched_props.flattening_bitmap;
+ hw->max_cgds = buf->sched_props.max_pf_cgds;
+
+ /* max sibling group size of current layer refers to the max children
+ * of the below layer node.
+ * layer 1 node max children will be layer 2 max sibling group size
+ * layer 2 node max children will be layer 3 max sibling group size
+ * and so on. This array will be populated from root (index 0) to
+ * qgroup layer 7. Leaf node has no children.
+ */
+ for (i = 0; i < hw->num_tx_sched_layers - 1; i++) {
+ max_sibl = buf->layer_props[i + 1].max_sibl_grp_sz;
+ hw->max_children[i] = le16_to_cpu(max_sibl);
+ }
+
+ hw->layer_info = devm_kmemdup(ice_hw_to_dev(hw), buf->layer_props,
+ (hw->num_tx_sched_layers *
+ sizeof(*hw->layer_info)),
+ GFP_KERNEL);
+ if (!hw->layer_info) {
+ status = -ENOMEM;
+ goto sched_query_out;
+ }
+
+sched_query_out:
+ devm_kfree(ice_hw_to_dev(hw), buf);
+ return status;
+}
+
+/**
+ * ice_sched_get_psm_clk_freq - determine the PSM clock frequency
+ * @hw: pointer to the HW struct
+ *
+ * Determine the PSM clock frequency and store in HW struct
+ */
+void ice_sched_get_psm_clk_freq(struct ice_hw *hw)
+{
+ u32 val, clk_src;
+
+ val = rd32(hw, GLGEN_CLKSTAT_SRC);
+ clk_src = (val & GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_M) >>
+ GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_S;
+
+#define PSM_CLK_SRC_367_MHZ 0x0
+#define PSM_CLK_SRC_416_MHZ 0x1
+#define PSM_CLK_SRC_446_MHZ 0x2
+#define PSM_CLK_SRC_390_MHZ 0x3
+
+ switch (clk_src) {
+ case PSM_CLK_SRC_367_MHZ:
+ hw->psm_clk_freq = ICE_PSM_CLK_367MHZ_IN_HZ;
+ break;
+ case PSM_CLK_SRC_416_MHZ:
+ hw->psm_clk_freq = ICE_PSM_CLK_416MHZ_IN_HZ;
+ break;
+ case PSM_CLK_SRC_446_MHZ:
+ hw->psm_clk_freq = ICE_PSM_CLK_446MHZ_IN_HZ;
+ break;
+ case PSM_CLK_SRC_390_MHZ:
+ hw->psm_clk_freq = ICE_PSM_CLK_390MHZ_IN_HZ;
+ break;
+ default:
+ ice_debug(hw, ICE_DBG_SCHED, "PSM clk_src unexpected %u\n",
+ clk_src);
+ /* fall back to a safe default */
+ hw->psm_clk_freq = ICE_PSM_CLK_446MHZ_IN_HZ;
+ }
+}
+
+/**
+ * ice_sched_find_node_in_subtree - Find node in part of base node subtree
+ * @hw: pointer to the HW struct
+ * @base: pointer to the base node
+ * @node: pointer to the node to search
+ *
+ * This function checks whether a given node is part of the base node
+ * subtree or not
+ */
+static bool
+ice_sched_find_node_in_subtree(struct ice_hw *hw, struct ice_sched_node *base,
+ struct ice_sched_node *node)
+{
+ u8 i;
+
+ for (i = 0; i < base->num_children; i++) {
+ struct ice_sched_node *child = base->children[i];
+
+ if (node == child)
+ return true;
+
+ if (child->tx_sched_layer > node->tx_sched_layer)
+ return false;
+
+ /* this recursion is intentional, and wouldn't
+ * go more than 8 calls
+ */
+ if (ice_sched_find_node_in_subtree(hw, child, node))
+ return true;
+ }
+ return false;
+}
+
+/**
+ * ice_sched_get_free_qgrp - Scan all queue group siblings and find a free node
+ * @pi: port information structure
+ * @vsi_node: software VSI handle
+ * @qgrp_node: first queue group node identified for scanning
+ * @owner: LAN or RDMA
+ *
+ * This function retrieves a free LAN or RDMA queue group node by scanning
+ * qgrp_node and its siblings for the queue group with the fewest number
+ * of queues currently assigned.
+ */
+static struct ice_sched_node *
+ice_sched_get_free_qgrp(struct ice_port_info *pi,
+ struct ice_sched_node *vsi_node,
+ struct ice_sched_node *qgrp_node, u8 owner)
+{
+ struct ice_sched_node *min_qgrp;
+ u8 min_children;
+
+ if (!qgrp_node)
+ return qgrp_node;
+ min_children = qgrp_node->num_children;
+ if (!min_children)
+ return qgrp_node;
+ min_qgrp = qgrp_node;
+ /* scan all queue groups until find a node which has less than the
+ * minimum number of children. This way all queue group nodes get
+ * equal number of shares and active. The bandwidth will be equally
+ * distributed across all queues.
+ */
+ while (qgrp_node) {
+ /* make sure the qgroup node is part of the VSI subtree */
+ if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
+ if (qgrp_node->num_children < min_children &&
+ qgrp_node->owner == owner) {
+ /* replace the new min queue group node */
+ min_qgrp = qgrp_node;
+ min_children = min_qgrp->num_children;
+ /* break if it has no children, */
+ if (!min_children)
+ break;
+ }
+ qgrp_node = qgrp_node->sibling;
+ }
+ return min_qgrp;
+}
+
+/**
+ * ice_sched_get_free_qparent - Get a free LAN or RDMA queue group node
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: branch number
+ * @owner: LAN or RDMA
+ *
+ * This function retrieves a free LAN or RDMA queue group node
+ */
+struct ice_sched_node *
+ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ u8 owner)
+{
+ struct ice_sched_node *vsi_node, *qgrp_node;
+ struct ice_vsi_ctx *vsi_ctx;
+ u16 max_children;
+ u8 qgrp_layer;
+
+ qgrp_layer = ice_sched_get_qgrp_layer(pi->hw);
+ max_children = pi->hw->max_children[qgrp_layer];
+
+ vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
+ if (!vsi_ctx)
+ return NULL;
+ vsi_node = vsi_ctx->sched.vsi_node[tc];
+ /* validate invalid VSI ID */
+ if (!vsi_node)
+ return NULL;
+
+ /* get the first queue group node from VSI sub-tree */
+ qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer);
+ while (qgrp_node) {
+ /* make sure the qgroup node is part of the VSI subtree */
+ if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
+ if (qgrp_node->num_children < max_children &&
+ qgrp_node->owner == owner)
+ break;
+ qgrp_node = qgrp_node->sibling;
+ }
+
+ /* Select the best queue group */
+ return ice_sched_get_free_qgrp(pi, vsi_node, qgrp_node, owner);
+}
+
+/**
+ * ice_sched_get_vsi_node - Get a VSI node based on VSI ID
+ * @pi: pointer to the port information structure
+ * @tc_node: pointer to the TC node
+ * @vsi_handle: software VSI handle
+ *
+ * This function retrieves a VSI node for a given VSI ID from a given
+ * TC branch
+ */
+static struct ice_sched_node *
+ice_sched_get_vsi_node(struct ice_port_info *pi, struct ice_sched_node *tc_node,
+ u16 vsi_handle)
+{
+ struct ice_sched_node *node;
+ u8 vsi_layer;
+
+ vsi_layer = ice_sched_get_vsi_layer(pi->hw);
+ node = ice_sched_get_first_node(pi, tc_node, vsi_layer);
+
+ /* Check whether it already exists */
+ while (node) {
+ if (node->vsi_handle == vsi_handle)
+ return node;
+ node = node->sibling;
+ }
+
+ return node;
+}
+
+/**
+ * ice_sched_get_agg_node - Get an aggregator node based on aggregator ID
+ * @pi: pointer to the port information structure
+ * @tc_node: pointer to the TC node
+ * @agg_id: aggregator ID
+ *
+ * This function retrieves an aggregator node for a given aggregator ID from
+ * a given TC branch
+ */
+static struct ice_sched_node *
+ice_sched_get_agg_node(struct ice_port_info *pi, struct ice_sched_node *tc_node,
+ u32 agg_id)
+{
+ struct ice_sched_node *node;
+ struct ice_hw *hw = pi->hw;
+ u8 agg_layer;
+
+ if (!hw)
+ return NULL;
+ agg_layer = ice_sched_get_agg_layer(hw);
+ node = ice_sched_get_first_node(pi, tc_node, agg_layer);
+
+ /* Check whether it already exists */
+ while (node) {
+ if (node->agg_id == agg_id)
+ return node;
+ node = node->sibling;
+ }
+
+ return node;
+}
+
+/**
+ * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
+ * @hw: pointer to the HW struct
+ * @num_qs: number of queues
+ * @num_nodes: num nodes array
+ *
+ * This function calculates the number of VSI child nodes based on the
+ * number of queues.
+ */
+static void
+ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes)
+{
+ u16 num = num_qs;
+ u8 i, qgl, vsil;
+
+ qgl = ice_sched_get_qgrp_layer(hw);
+ vsil = ice_sched_get_vsi_layer(hw);
+
+ /* calculate num nodes from queue group to VSI layer */
+ for (i = qgl; i > vsil; i--) {
+ /* round to the next integer if there is a remainder */
+ num = DIV_ROUND_UP(num, hw->max_children[i]);
+
+ /* need at least one node */
+ num_nodes[i] = num ? num : 1;
+ }
+}
+
+/**
+ * ice_sched_add_vsi_child_nodes - add VSI child nodes to tree
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc_node: pointer to the TC node
+ * @num_nodes: pointer to the num nodes that needs to be added per layer
+ * @owner: node owner (LAN or RDMA)
+ *
+ * This function adds the VSI child nodes to tree. It gets called for
+ * LAN and RDMA separately.
+ */
+static int
+ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
+ struct ice_sched_node *tc_node, u16 *num_nodes,
+ u8 owner)
+{
+ struct ice_sched_node *parent, *node;
+ struct ice_hw *hw = pi->hw;
+ u32 first_node_teid;
+ u16 num_added = 0;
+ u8 i, qgl, vsil;
+ int status;
+
+ qgl = ice_sched_get_qgrp_layer(hw);
+ vsil = ice_sched_get_vsi_layer(hw);
+ parent = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
+ for (i = vsil + 1; i <= qgl; i++) {
+ if (!parent)
+ return -EIO;
+
+ status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
+ num_nodes[i],
+ &first_node_teid,
+ &num_added);
+ if (status || num_nodes[i] != num_added)
+ return -EIO;
+
+ /* The newly added node can be a new parent for the next
+ * layer nodes
+ */
+ if (num_added) {
+ parent = ice_sched_find_node_by_teid(tc_node,
+ first_node_teid);
+ node = parent;
+ while (node) {
+ node->owner = owner;
+ node = node->sibling;
+ }
+ } else {
+ parent = parent->children[0];
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes
+ * @pi: pointer to the port info structure
+ * @tc_node: pointer to TC node
+ * @num_nodes: pointer to num nodes array
+ *
+ * This function calculates the number of supported nodes needed to add this
+ * VSI into Tx tree including the VSI, parent and intermediate nodes in below
+ * layers
+ */
+static void
+ice_sched_calc_vsi_support_nodes(struct ice_port_info *pi,
+ struct ice_sched_node *tc_node, u16 *num_nodes)
+{
+ struct ice_sched_node *node;
+ u8 vsil;
+ int i;
+
+ vsil = ice_sched_get_vsi_layer(pi->hw);
+ for (i = vsil; i >= pi->hw->sw_entry_point_layer; i--)
+ /* Add intermediate nodes if TC has no children and
+ * need at least one node for VSI
+ */
+ if (!tc_node->num_children || i == vsil) {
+ num_nodes[i]++;
+ } else {
+ /* If intermediate nodes are reached max children
+ * then add a new one.
+ */
+ node = ice_sched_get_first_node(pi, tc_node, (u8)i);
+ /* scan all the siblings */
+ while (node) {
+ if (node->num_children < pi->hw->max_children[i])
+ break;
+ node = node->sibling;
+ }
+
+ /* tree has one intermediate node to add this new VSI.
+ * So no need to calculate supported nodes for below
+ * layers.
+ */
+ if (node)
+ break;
+ /* all the nodes are full, allocate a new one */
+ num_nodes[i]++;
+ }
+}
+
+/**
+ * ice_sched_add_vsi_support_nodes - add VSI supported nodes into Tx tree
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc_node: pointer to TC node
+ * @num_nodes: pointer to num nodes array
+ *
+ * This function adds the VSI supported nodes into Tx tree including the
+ * VSI, its parent and intermediate nodes in below layers
+ */
+static int
+ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_handle,
+ struct ice_sched_node *tc_node, u16 *num_nodes)
+{
+ struct ice_sched_node *parent = tc_node;
+ u32 first_node_teid;
+ u16 num_added = 0;
+ u8 i, vsil;
+ int status;
+
+ if (!pi)
+ return -EINVAL;
+
+ vsil = ice_sched_get_vsi_layer(pi->hw);
+ for (i = pi->hw->sw_entry_point_layer; i <= vsil; i++) {
+ status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
+ i, num_nodes[i],
+ &first_node_teid,
+ &num_added);
+ if (status || num_nodes[i] != num_added)
+ return -EIO;
+
+ /* The newly added node can be a new parent for the next
+ * layer nodes
+ */
+ if (num_added)
+ parent = ice_sched_find_node_by_teid(tc_node,
+ first_node_teid);
+ else
+ parent = parent->children[0];
+
+ if (!parent)
+ return -EIO;
+
+ if (i == vsil)
+ parent->vsi_handle = vsi_handle;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_sched_add_vsi_to_topo - add a new VSI into tree
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ *
+ * This function adds a new VSI into scheduler tree
+ */
+static int
+ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc)
+{
+ u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+ struct ice_sched_node *tc_node;
+
+ tc_node = ice_sched_get_tc_node(pi, tc);
+ if (!tc_node)
+ return -EINVAL;
+
+ /* calculate number of supported nodes needed for this VSI */
+ ice_sched_calc_vsi_support_nodes(pi, tc_node, num_nodes);
+
+ /* add VSI supported nodes to TC subtree */
+ return ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node,
+ num_nodes);
+}
+
+/**
+ * ice_sched_update_vsi_child_nodes - update VSI child nodes
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @new_numqs: new number of max queues
+ * @owner: owner of this subtree
+ *
+ * This function updates the VSI child nodes based on the number of queues
+ */
+static int
+ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
+ u8 tc, u16 new_numqs, u8 owner)
+{
+ u16 new_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+ struct ice_sched_node *vsi_node;
+ struct ice_sched_node *tc_node;
+ struct ice_vsi_ctx *vsi_ctx;
+ struct ice_hw *hw = pi->hw;
+ u16 prev_numqs;
+ int status = 0;
+
+ tc_node = ice_sched_get_tc_node(pi, tc);
+ if (!tc_node)
+ return -EIO;
+
+ vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
+ if (!vsi_node)
+ return -EIO;
+
+ vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+ if (!vsi_ctx)
+ return -EINVAL;
+
+ if (owner == ICE_SCHED_NODE_OWNER_LAN)
+ prev_numqs = vsi_ctx->sched.max_lanq[tc];
+ else
+ prev_numqs = vsi_ctx->sched.max_rdmaq[tc];
+ /* num queues are not changed or less than the previous number */
+ if (new_numqs <= prev_numqs)
+ return status;
+ if (owner == ICE_SCHED_NODE_OWNER_LAN) {
+ status = ice_alloc_lan_q_ctx(hw, vsi_handle, tc, new_numqs);
+ if (status)
+ return status;
+ } else {
+ status = ice_alloc_rdma_q_ctx(hw, vsi_handle, tc, new_numqs);
+ if (status)
+ return status;
+ }
+
+ if (new_numqs)
+ ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
+ /* Keep the max number of queue configuration all the time. Update the
+ * tree only if number of queues > previous number of queues. This may
+ * leave some extra nodes in the tree if number of queues < previous
+ * number but that wouldn't harm anything. Removing those extra nodes
+ * may complicate the code if those nodes are part of SRL or
+ * individually rate limited.
+ */
+ status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node,
+ new_num_nodes, owner);
+ if (status)
+ return status;
+ if (owner == ICE_SCHED_NODE_OWNER_LAN)
+ vsi_ctx->sched.max_lanq[tc] = new_numqs;
+ else
+ vsi_ctx->sched.max_rdmaq[tc] = new_numqs;
+
+ return 0;
+}
+
+/**
+ * ice_sched_cfg_vsi - configure the new/existing VSI
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @maxqs: max number of queues
+ * @owner: LAN or RDMA
+ * @enable: TC enabled or disabled
+ *
+ * This function adds/updates VSI nodes based on the number of queues. If TC is
+ * enabled and VSI is in suspended state then resume the VSI back. If TC is
+ * disabled then suspend the VSI if it is not already.
+ */
+int
+ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
+ u8 owner, bool enable)
+{
+ struct ice_sched_node *vsi_node, *tc_node;
+ struct ice_vsi_ctx *vsi_ctx;
+ struct ice_hw *hw = pi->hw;
+ int status = 0;
+
+ ice_debug(pi->hw, ICE_DBG_SCHED, "add/config VSI %d\n", vsi_handle);
+ tc_node = ice_sched_get_tc_node(pi, tc);
+ if (!tc_node)
+ return -EINVAL;
+ vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+ if (!vsi_ctx)
+ return -EINVAL;
+ vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
+
+ /* suspend the VSI if TC is not enabled */
+ if (!enable) {
+ if (vsi_node && vsi_node->in_use) {
+ u32 teid = le32_to_cpu(vsi_node->info.node_teid);
+
+ status = ice_sched_suspend_resume_elems(hw, 1, &teid,
+ true);
+ if (!status)
+ vsi_node->in_use = false;
+ }
+ return status;
+ }
+
+ /* TC is enabled, if it is a new VSI then add it to the tree */
+ if (!vsi_node) {
+ status = ice_sched_add_vsi_to_topo(pi, vsi_handle, tc);
+ if (status)
+ return status;
+
+ vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
+ if (!vsi_node)
+ return -EIO;
+
+ vsi_ctx->sched.vsi_node[tc] = vsi_node;
+ vsi_node->in_use = true;
+ /* invalidate the max queues whenever VSI gets added first time
+ * into the scheduler tree (boot or after reset). We need to
+ * recreate the child nodes all the time in these cases.
+ */
+ vsi_ctx->sched.max_lanq[tc] = 0;
+ vsi_ctx->sched.max_rdmaq[tc] = 0;
+ }
+
+ /* update the VSI child nodes */
+ status = ice_sched_update_vsi_child_nodes(pi, vsi_handle, tc, maxqs,
+ owner);
+ if (status)
+ return status;
+
+ /* TC is enabled, resume the VSI if it is in the suspend state */
+ if (!vsi_node->in_use) {
+ u32 teid = le32_to_cpu(vsi_node->info.node_teid);
+
+ status = ice_sched_suspend_resume_elems(hw, 1, &teid, false);
+ if (!status)
+ vsi_node->in_use = true;
+ }
+
+ return status;
+}
+
+/**
+ * ice_sched_rm_agg_vsi_info - remove aggregator related VSI info entry
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ *
+ * This function removes single aggregator VSI info entry from
+ * aggregator list.
+ */
+static void ice_sched_rm_agg_vsi_info(struct ice_port_info *pi, u16 vsi_handle)
+{
+ struct ice_sched_agg_info *agg_info;
+ struct ice_sched_agg_info *atmp;
+
+ list_for_each_entry_safe(agg_info, atmp, &pi->hw->agg_list,
+ list_entry) {
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+ struct ice_sched_agg_vsi_info *vtmp;
+
+ list_for_each_entry_safe(agg_vsi_info, vtmp,
+ &agg_info->agg_vsi_list, list_entry)
+ if (agg_vsi_info->vsi_handle == vsi_handle) {
+ list_del(&agg_vsi_info->list_entry);
+ devm_kfree(ice_hw_to_dev(pi->hw),
+ agg_vsi_info);
+ return;
+ }
+ }
+}
+
+/**
+ * ice_sched_is_leaf_node_present - check for a leaf node in the sub-tree
+ * @node: pointer to the sub-tree node
+ *
+ * This function checks for a leaf node presence in a given sub-tree node.
+ */
+static bool ice_sched_is_leaf_node_present(struct ice_sched_node *node)
+{
+ u8 i;
+
+ for (i = 0; i < node->num_children; i++)
+ if (ice_sched_is_leaf_node_present(node->children[i]))
+ return true;
+ /* check for a leaf node */
+ return (node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF);
+}
+
+/**
+ * ice_sched_rm_vsi_cfg - remove the VSI and its children nodes
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @owner: LAN or RDMA
+ *
+ * This function removes the VSI and its LAN or RDMA children nodes from the
+ * scheduler tree.
+ */
+static int
+ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
+{
+ struct ice_vsi_ctx *vsi_ctx;
+ int status = -EINVAL;
+ u8 i;
+
+ ice_debug(pi->hw, ICE_DBG_SCHED, "removing VSI %d\n", vsi_handle);
+ if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+ return status;
+ mutex_lock(&pi->sched_lock);
+ vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
+ if (!vsi_ctx)
+ goto exit_sched_rm_vsi_cfg;
+
+ ice_for_each_traffic_class(i) {
+ struct ice_sched_node *vsi_node, *tc_node;
+ u8 j = 0;
+
+ tc_node = ice_sched_get_tc_node(pi, i);
+ if (!tc_node)
+ continue;
+
+ vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
+ if (!vsi_node)
+ continue;
+
+ if (ice_sched_is_leaf_node_present(vsi_node)) {
+ ice_debug(pi->hw, ICE_DBG_SCHED, "VSI has leaf nodes in TC %d\n", i);
+ status = -EBUSY;
+ goto exit_sched_rm_vsi_cfg;
+ }
+ while (j < vsi_node->num_children) {
+ if (vsi_node->children[j]->owner == owner) {
+ ice_free_sched_node(pi, vsi_node->children[j]);
+
+ /* reset the counter again since the num
+ * children will be updated after node removal
+ */
+ j = 0;
+ } else {
+ j++;
+ }
+ }
+ /* remove the VSI if it has no children */
+ if (!vsi_node->num_children) {
+ ice_free_sched_node(pi, vsi_node);
+ vsi_ctx->sched.vsi_node[i] = NULL;
+
+ /* clean up aggregator related VSI info if any */
+ ice_sched_rm_agg_vsi_info(pi, vsi_handle);
+ }
+ if (owner == ICE_SCHED_NODE_OWNER_LAN)
+ vsi_ctx->sched.max_lanq[i] = 0;
+ else
+ vsi_ctx->sched.max_rdmaq[i] = 0;
+ }
+ status = 0;
+
+exit_sched_rm_vsi_cfg:
+ mutex_unlock(&pi->sched_lock);
+ return status;
+}
+
+/**
+ * ice_rm_vsi_lan_cfg - remove VSI and its LAN children nodes
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ *
+ * This function clears the VSI and its LAN children nodes from scheduler tree
+ * for all TCs.
+ */
+int ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle)
+{
+ return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_LAN);
+}
+
+/**
+ * ice_rm_vsi_rdma_cfg - remove VSI and its RDMA children nodes
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ *
+ * This function clears the VSI and its RDMA children nodes from scheduler tree
+ * for all TCs.
+ */
+int ice_rm_vsi_rdma_cfg(struct ice_port_info *pi, u16 vsi_handle)
+{
+ return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_RDMA);
+}
+
+/**
+ * ice_get_agg_info - get the aggregator ID
+ * @hw: pointer to the hardware structure
+ * @agg_id: aggregator ID
+ *
+ * This function validates aggregator ID. The function returns info if
+ * aggregator ID is present in list otherwise it returns null.
+ */
+static struct ice_sched_agg_info *
+ice_get_agg_info(struct ice_hw *hw, u32 agg_id)
+{
+ struct ice_sched_agg_info *agg_info;
+
+ list_for_each_entry(agg_info, &hw->agg_list, list_entry)
+ if (agg_info->agg_id == agg_id)
+ return agg_info;
+
+ return NULL;
+}
+
+/**
+ * ice_sched_get_free_vsi_parent - Find a free parent node in aggregator subtree
+ * @hw: pointer to the HW struct
+ * @node: pointer to a child node
+ * @num_nodes: num nodes count array
+ *
+ * This function walks through the aggregator subtree to find a free parent
+ * node
+ */
+static struct ice_sched_node *
+ice_sched_get_free_vsi_parent(struct ice_hw *hw, struct ice_sched_node *node,
+ u16 *num_nodes)
+{
+ u8 l = node->tx_sched_layer;
+ u8 vsil, i;
+
+ vsil = ice_sched_get_vsi_layer(hw);
+
+ /* Is it VSI parent layer ? */
+ if (l == vsil - 1)
+ return (node->num_children < hw->max_children[l]) ? node : NULL;
+
+ /* We have intermediate nodes. Let's walk through the subtree. If the
+ * intermediate node has space to add a new node then clear the count
+ */
+ if (node->num_children < hw->max_children[l])
+ num_nodes[l] = 0;
+ /* The below recursive call is intentional and wouldn't go more than
+ * 2 or 3 iterations.
+ */
+
+ for (i = 0; i < node->num_children; i++) {
+ struct ice_sched_node *parent;
+
+ parent = ice_sched_get_free_vsi_parent(hw, node->children[i],
+ num_nodes);
+ if (parent)
+ return parent;
+ }
+
+ return NULL;
+}
+
+/**
+ * ice_sched_update_parent - update the new parent in SW DB
+ * @new_parent: pointer to a new parent node
+ * @node: pointer to a child node
+ *
+ * This function removes the child from the old parent and adds it to a new
+ * parent
+ */
+static void
+ice_sched_update_parent(struct ice_sched_node *new_parent,
+ struct ice_sched_node *node)
+{
+ struct ice_sched_node *old_parent;
+ u8 i, j;
+
+ old_parent = node->parent;
+
+ /* update the old parent children */
+ for (i = 0; i < old_parent->num_children; i++)
+ if (old_parent->children[i] == node) {
+ for (j = i + 1; j < old_parent->num_children; j++)
+ old_parent->children[j - 1] =
+ old_parent->children[j];
+ old_parent->num_children--;
+ break;
+ }
+
+ /* now move the node to a new parent */
+ new_parent->children[new_parent->num_children++] = node;
+ node->parent = new_parent;
+ node->info.parent_teid = new_parent->info.node_teid;
+}
+
+/**
+ * ice_sched_move_nodes - move child nodes to a given parent
+ * @pi: port information structure
+ * @parent: pointer to parent node
+ * @num_items: number of child nodes to be moved
+ * @list: pointer to child node teids
+ *
+ * This function move the child nodes to a given parent.
+ */
+static int
+ice_sched_move_nodes(struct ice_port_info *pi, struct ice_sched_node *parent,
+ u16 num_items, u32 *list)
+{
+ struct ice_aqc_move_elem *buf;
+ struct ice_sched_node *node;
+ u16 i, grps_movd = 0;
+ struct ice_hw *hw;
+ int status = 0;
+ u16 buf_len;
+
+ hw = pi->hw;
+
+ if (!parent || !num_items)
+ return -EINVAL;
+
+ /* Does parent have enough space */
+ if (parent->num_children + num_items >
+ hw->max_children[parent->tx_sched_layer])
+ return -ENOSPC;
+
+ buf_len = struct_size(buf, teid, 1);
+ buf = kzalloc(buf_len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ for (i = 0; i < num_items; i++) {
+ node = ice_sched_find_node_by_teid(pi->root, list[i]);
+ if (!node) {
+ status = -EINVAL;
+ goto move_err_exit;
+ }
+
+ buf->hdr.src_parent_teid = node->info.parent_teid;
+ buf->hdr.dest_parent_teid = parent->info.node_teid;
+ buf->teid[0] = node->info.node_teid;
+ buf->hdr.num_elems = cpu_to_le16(1);
+ status = ice_aq_move_sched_elems(hw, 1, buf, buf_len,
+ &grps_movd, NULL);
+ if (status && grps_movd != 1) {
+ status = -EIO;
+ goto move_err_exit;
+ }
+
+ /* update the SW DB */
+ ice_sched_update_parent(parent, node);
+ }
+
+move_err_exit:
+ kfree(buf);
+ return status;
+}
+
+/**
+ * ice_sched_move_vsi_to_agg - move VSI to aggregator node
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @agg_id: aggregator ID
+ * @tc: TC number
+ *
+ * This function moves a VSI to an aggregator node or its subtree.
+ * Intermediate nodes may be created if required.
+ */
+static int
+ice_sched_move_vsi_to_agg(struct ice_port_info *pi, u16 vsi_handle, u32 agg_id,
+ u8 tc)
+{
+ struct ice_sched_node *vsi_node, *agg_node, *tc_node, *parent;
+ u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+ u32 first_node_teid, vsi_teid;
+ u16 num_nodes_added;
+ u8 aggl, vsil, i;
+ int status;
+
+ tc_node = ice_sched_get_tc_node(pi, tc);
+ if (!tc_node)
+ return -EIO;
+
+ agg_node = ice_sched_get_agg_node(pi, tc_node, agg_id);
+ if (!agg_node)
+ return -ENOENT;
+
+ vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
+ if (!vsi_node)
+ return -ENOENT;
+
+ /* Is this VSI already part of given aggregator? */
+ if (ice_sched_find_node_in_subtree(pi->hw, agg_node, vsi_node))
+ return 0;
+
+ aggl = ice_sched_get_agg_layer(pi->hw);
+ vsil = ice_sched_get_vsi_layer(pi->hw);
+
+ /* set intermediate node count to 1 between aggregator and VSI layers */
+ for (i = aggl + 1; i < vsil; i++)
+ num_nodes[i] = 1;
+
+ /* Check if the aggregator subtree has any free node to add the VSI */
+ for (i = 0; i < agg_node->num_children; i++) {
+ parent = ice_sched_get_free_vsi_parent(pi->hw,
+ agg_node->children[i],
+ num_nodes);
+ if (parent)
+ goto move_nodes;
+ }
+
+ /* add new nodes */
+ parent = agg_node;
+ for (i = aggl + 1; i < vsil; i++) {
+ status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
+ num_nodes[i],
+ &first_node_teid,
+ &num_nodes_added);
+ if (status || num_nodes[i] != num_nodes_added)
+ return -EIO;
+
+ /* The newly added node can be a new parent for the next
+ * layer nodes
+ */
+ if (num_nodes_added)
+ parent = ice_sched_find_node_by_teid(tc_node,
+ first_node_teid);
+ else
+ parent = parent->children[0];
+
+ if (!parent)
+ return -EIO;
+ }
+
+move_nodes:
+ vsi_teid = le32_to_cpu(vsi_node->info.node_teid);
+ return ice_sched_move_nodes(pi, parent, 1, &vsi_teid);
+}
+
+/**
+ * ice_move_all_vsi_to_dflt_agg - move all VSI(s) to default aggregator
+ * @pi: port information structure
+ * @agg_info: aggregator info
+ * @tc: traffic class number
+ * @rm_vsi_info: true or false
+ *
+ * This function move all the VSI(s) to the default aggregator and delete
+ * aggregator VSI info based on passed in boolean parameter rm_vsi_info. The
+ * caller holds the scheduler lock.
+ */
+static int
+ice_move_all_vsi_to_dflt_agg(struct ice_port_info *pi,
+ struct ice_sched_agg_info *agg_info, u8 tc,
+ bool rm_vsi_info)
+{
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+ struct ice_sched_agg_vsi_info *tmp;
+ int status = 0;
+
+ list_for_each_entry_safe(agg_vsi_info, tmp, &agg_info->agg_vsi_list,
+ list_entry) {
+ u16 vsi_handle = agg_vsi_info->vsi_handle;
+
+ /* Move VSI to default aggregator */
+ if (!ice_is_tc_ena(agg_vsi_info->tc_bitmap[0], tc))
+ continue;
+
+ status = ice_sched_move_vsi_to_agg(pi, vsi_handle,
+ ICE_DFLT_AGG_ID, tc);
+ if (status)
+ break;
+
+ clear_bit(tc, agg_vsi_info->tc_bitmap);
+ if (rm_vsi_info && !agg_vsi_info->tc_bitmap[0]) {
+ list_del(&agg_vsi_info->list_entry);
+ devm_kfree(ice_hw_to_dev(pi->hw), agg_vsi_info);
+ }
+ }
+
+ return status;
+}
+
+/**
+ * ice_sched_is_agg_inuse - check whether the aggregator is in use or not
+ * @pi: port information structure
+ * @node: node pointer
+ *
+ * This function checks whether the aggregator is attached with any VSI or not.
+ */
+static bool
+ice_sched_is_agg_inuse(struct ice_port_info *pi, struct ice_sched_node *node)
+{
+ u8 vsil, i;
+
+ vsil = ice_sched_get_vsi_layer(pi->hw);
+ if (node->tx_sched_layer < vsil - 1) {
+ for (i = 0; i < node->num_children; i++)
+ if (ice_sched_is_agg_inuse(pi, node->children[i]))
+ return true;
+ return false;
+ } else {
+ return node->num_children ? true : false;
+ }
+}
+
+/**
+ * ice_sched_rm_agg_cfg - remove the aggregator node
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @tc: TC number
+ *
+ * This function removes the aggregator node and intermediate nodes if any
+ * from the given TC
+ */
+static int
+ice_sched_rm_agg_cfg(struct ice_port_info *pi, u32 agg_id, u8 tc)
+{
+ struct ice_sched_node *tc_node, *agg_node;
+ struct ice_hw *hw = pi->hw;
+
+ tc_node = ice_sched_get_tc_node(pi, tc);
+ if (!tc_node)
+ return -EIO;
+
+ agg_node = ice_sched_get_agg_node(pi, tc_node, agg_id);
+ if (!agg_node)
+ return -ENOENT;
+
+ /* Can't remove the aggregator node if it has children */
+ if (ice_sched_is_agg_inuse(pi, agg_node))
+ return -EBUSY;
+
+ /* need to remove the whole subtree if aggregator node is the
+ * only child.
+ */
+ while (agg_node->tx_sched_layer > hw->sw_entry_point_layer) {
+ struct ice_sched_node *parent = agg_node->parent;
+
+ if (!parent)
+ return -EIO;
+
+ if (parent->num_children > 1)
+ break;
+
+ agg_node = parent;
+ }
+
+ ice_free_sched_node(pi, agg_node);
+ return 0;
+}
+
+/**
+ * ice_rm_agg_cfg_tc - remove aggregator configuration for TC
+ * @pi: port information structure
+ * @agg_info: aggregator ID
+ * @tc: TC number
+ * @rm_vsi_info: bool value true or false
+ *
+ * This function removes aggregator reference to VSI of given TC. It removes
+ * the aggregator configuration completely for requested TC. The caller needs
+ * to hold the scheduler lock.
+ */
+static int
+ice_rm_agg_cfg_tc(struct ice_port_info *pi, struct ice_sched_agg_info *agg_info,
+ u8 tc, bool rm_vsi_info)
+{
+ int status = 0;
+
+ /* If nothing to remove - return success */
+ if (!ice_is_tc_ena(agg_info->tc_bitmap[0], tc))
+ goto exit_rm_agg_cfg_tc;
+
+ status = ice_move_all_vsi_to_dflt_agg(pi, agg_info, tc, rm_vsi_info);
+ if (status)
+ goto exit_rm_agg_cfg_tc;
+
+ /* Delete aggregator node(s) */
+ status = ice_sched_rm_agg_cfg(pi, agg_info->agg_id, tc);
+ if (status)
+ goto exit_rm_agg_cfg_tc;
+
+ clear_bit(tc, agg_info->tc_bitmap);
+exit_rm_agg_cfg_tc:
+ return status;
+}
+
+/**
+ * ice_save_agg_tc_bitmap - save aggregator TC bitmap
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @tc_bitmap: 8 bits TC bitmap
+ *
+ * Save aggregator TC bitmap. This function needs to be called with scheduler
+ * lock held.
+ */
+static int
+ice_save_agg_tc_bitmap(struct ice_port_info *pi, u32 agg_id,
+ unsigned long *tc_bitmap)
+{
+ struct ice_sched_agg_info *agg_info;
+
+ agg_info = ice_get_agg_info(pi->hw, agg_id);
+ if (!agg_info)
+ return -EINVAL;
+ bitmap_copy(agg_info->replay_tc_bitmap, tc_bitmap,
+ ICE_MAX_TRAFFIC_CLASS);
+ return 0;
+}
+
+/**
+ * ice_sched_add_agg_cfg - create an aggregator node
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @tc: TC number
+ *
+ * This function creates an aggregator node and intermediate nodes if required
+ * for the given TC
+ */
+static int
+ice_sched_add_agg_cfg(struct ice_port_info *pi, u32 agg_id, u8 tc)
+{
+ struct ice_sched_node *parent, *agg_node, *tc_node;
+ u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+ struct ice_hw *hw = pi->hw;
+ u32 first_node_teid;
+ u16 num_nodes_added;
+ int status = 0;
+ u8 i, aggl;
+
+ tc_node = ice_sched_get_tc_node(pi, tc);
+ if (!tc_node)
+ return -EIO;
+
+ agg_node = ice_sched_get_agg_node(pi, tc_node, agg_id);
+ /* Does Agg node already exist ? */
+ if (agg_node)
+ return status;
+
+ aggl = ice_sched_get_agg_layer(hw);
+
+ /* need one node in Agg layer */
+ num_nodes[aggl] = 1;
+
+ /* Check whether the intermediate nodes have space to add the
+ * new aggregator. If they are full, then SW needs to allocate a new
+ * intermediate node on those layers
+ */
+ for (i = hw->sw_entry_point_layer; i < aggl; i++) {
+ parent = ice_sched_get_first_node(pi, tc_node, i);
+
+ /* scan all the siblings */
+ while (parent) {
+ if (parent->num_children < hw->max_children[i])
+ break;
+ parent = parent->sibling;
+ }
+
+ /* all the nodes are full, reserve one for this layer */
+ if (!parent)
+ num_nodes[i]++;
+ }
+
+ /* add the aggregator node */
+ parent = tc_node;
+ for (i = hw->sw_entry_point_layer; i <= aggl; i++) {
+ if (!parent)
+ return -EIO;
+
+ status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
+ num_nodes[i],
+ &first_node_teid,
+ &num_nodes_added);
+ if (status || num_nodes[i] != num_nodes_added)
+ return -EIO;
+
+ /* The newly added node can be a new parent for the next
+ * layer nodes
+ */
+ if (num_nodes_added) {
+ parent = ice_sched_find_node_by_teid(tc_node,
+ first_node_teid);
+ /* register aggregator ID with the aggregator node */
+ if (parent && i == aggl)
+ parent->agg_id = agg_id;
+ } else {
+ parent = parent->children[0];
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_sched_cfg_agg - configure aggregator node
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @agg_type: aggregator type queue, VSI, or aggregator group
+ * @tc_bitmap: bits TC bitmap
+ *
+ * It registers a unique aggregator node into scheduler services. It
+ * allows a user to register with a unique ID to track it's resources.
+ * The aggregator type determines if this is a queue group, VSI group
+ * or aggregator group. It then creates the aggregator node(s) for requested
+ * TC(s) or removes an existing aggregator node including its configuration
+ * if indicated via tc_bitmap. Call ice_rm_agg_cfg to release aggregator
+ * resources and remove aggregator ID.
+ * This function needs to be called with scheduler lock held.
+ */
+static int
+ice_sched_cfg_agg(struct ice_port_info *pi, u32 agg_id,
+ enum ice_agg_type agg_type, unsigned long *tc_bitmap)
+{
+ struct ice_sched_agg_info *agg_info;
+ struct ice_hw *hw = pi->hw;
+ int status = 0;
+ u8 tc;
+
+ agg_info = ice_get_agg_info(hw, agg_id);
+ if (!agg_info) {
+ /* Create new entry for new aggregator ID */
+ agg_info = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*agg_info),
+ GFP_KERNEL);
+ if (!agg_info)
+ return -ENOMEM;
+
+ agg_info->agg_id = agg_id;
+ agg_info->agg_type = agg_type;
+ agg_info->tc_bitmap[0] = 0;
+
+ /* Initialize the aggregator VSI list head */
+ INIT_LIST_HEAD(&agg_info->agg_vsi_list);
+
+ /* Add new entry in aggregator list */
+ list_add(&agg_info->list_entry, &hw->agg_list);
+ }
+ /* Create aggregator node(s) for requested TC(s) */
+ ice_for_each_traffic_class(tc) {
+ if (!ice_is_tc_ena(*tc_bitmap, tc)) {
+ /* Delete aggregator cfg TC if it exists previously */
+ status = ice_rm_agg_cfg_tc(pi, agg_info, tc, false);
+ if (status)
+ break;
+ continue;
+ }
+
+ /* Check if aggregator node for TC already exists */
+ if (ice_is_tc_ena(agg_info->tc_bitmap[0], tc))
+ continue;
+
+ /* Create new aggregator node for TC */
+ status = ice_sched_add_agg_cfg(pi, agg_id, tc);
+ if (status)
+ break;
+
+ /* Save aggregator node's TC information */
+ set_bit(tc, agg_info->tc_bitmap);
+ }
+
+ return status;
+}
+
+/**
+ * ice_cfg_agg - config aggregator node
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @agg_type: aggregator type queue, VSI, or aggregator group
+ * @tc_bitmap: bits TC bitmap
+ *
+ * This function configures aggregator node(s).
+ */
+int
+ice_cfg_agg(struct ice_port_info *pi, u32 agg_id, enum ice_agg_type agg_type,
+ u8 tc_bitmap)
+{
+ unsigned long bitmap = tc_bitmap;
+ int status;
+
+ mutex_lock(&pi->sched_lock);
+ status = ice_sched_cfg_agg(pi, agg_id, agg_type, &bitmap);
+ if (!status)
+ status = ice_save_agg_tc_bitmap(pi, agg_id, &bitmap);
+ mutex_unlock(&pi->sched_lock);
+ return status;
+}
+
+/**
+ * ice_get_agg_vsi_info - get the aggregator ID
+ * @agg_info: aggregator info
+ * @vsi_handle: software VSI handle
+ *
+ * The function returns aggregator VSI info based on VSI handle. This function
+ * needs to be called with scheduler lock held.
+ */
+static struct ice_sched_agg_vsi_info *
+ice_get_agg_vsi_info(struct ice_sched_agg_info *agg_info, u16 vsi_handle)
+{
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+
+ list_for_each_entry(agg_vsi_info, &agg_info->agg_vsi_list, list_entry)
+ if (agg_vsi_info->vsi_handle == vsi_handle)
+ return agg_vsi_info;
+
+ return NULL;
+}
+
+/**
+ * ice_get_vsi_agg_info - get the aggregator info of VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: Sw VSI handle
+ *
+ * The function returns aggregator info of VSI represented via vsi_handle. The
+ * VSI has in this case a different aggregator than the default one. This
+ * function needs to be called with scheduler lock held.
+ */
+static struct ice_sched_agg_info *
+ice_get_vsi_agg_info(struct ice_hw *hw, u16 vsi_handle)
+{
+ struct ice_sched_agg_info *agg_info;
+
+ list_for_each_entry(agg_info, &hw->agg_list, list_entry) {
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+
+ agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle);
+ if (agg_vsi_info)
+ return agg_info;
+ }
+ return NULL;
+}
+
+/**
+ * ice_save_agg_vsi_tc_bitmap - save aggregator VSI TC bitmap
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap of enabled TC(s)
+ *
+ * Save VSI to aggregator TC bitmap. This function needs to call with scheduler
+ * lock held.
+ */
+static int
+ice_save_agg_vsi_tc_bitmap(struct ice_port_info *pi, u32 agg_id, u16 vsi_handle,
+ unsigned long *tc_bitmap)
+{
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+ struct ice_sched_agg_info *agg_info;
+
+ agg_info = ice_get_agg_info(pi->hw, agg_id);
+ if (!agg_info)
+ return -EINVAL;
+ /* check if entry already exist */
+ agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle);
+ if (!agg_vsi_info)
+ return -EINVAL;
+ bitmap_copy(agg_vsi_info->replay_tc_bitmap, tc_bitmap,
+ ICE_MAX_TRAFFIC_CLASS);
+ return 0;
+}
+
+/**
+ * ice_sched_assoc_vsi_to_agg - associate/move VSI to new/default aggregator
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap of enabled TC(s)
+ *
+ * This function moves VSI to a new or default aggregator node. If VSI is
+ * already associated to the aggregator node then no operation is performed on
+ * the tree. This function needs to be called with scheduler lock held.
+ */
+static int
+ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id,
+ u16 vsi_handle, unsigned long *tc_bitmap)
+{
+ struct ice_sched_agg_vsi_info *agg_vsi_info, *iter, *old_agg_vsi_info = NULL;
+ struct ice_sched_agg_info *agg_info, *old_agg_info;
+ struct ice_hw *hw = pi->hw;
+ int status = 0;
+ u8 tc;
+
+ if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+ return -EINVAL;
+ agg_info = ice_get_agg_info(hw, agg_id);
+ if (!agg_info)
+ return -EINVAL;
+ /* If the VSI is already part of another aggregator then update
+ * its VSI info list
+ */
+ old_agg_info = ice_get_vsi_agg_info(hw, vsi_handle);
+ if (old_agg_info && old_agg_info != agg_info) {
+ struct ice_sched_agg_vsi_info *vtmp;
+
+ list_for_each_entry_safe(iter, vtmp,
+ &old_agg_info->agg_vsi_list,
+ list_entry)
+ if (iter->vsi_handle == vsi_handle) {
+ old_agg_vsi_info = iter;
+ break;
+ }
+ }
+
+ /* check if entry already exist */
+ agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle);
+ if (!agg_vsi_info) {
+ /* Create new entry for VSI under aggregator list */
+ agg_vsi_info = devm_kzalloc(ice_hw_to_dev(hw),
+ sizeof(*agg_vsi_info), GFP_KERNEL);
+ if (!agg_vsi_info)
+ return -EINVAL;
+
+ /* add VSI ID into the aggregator list */
+ agg_vsi_info->vsi_handle = vsi_handle;
+ list_add(&agg_vsi_info->list_entry, &agg_info->agg_vsi_list);
+ }
+ /* Move VSI node to new aggregator node for requested TC(s) */
+ ice_for_each_traffic_class(tc) {
+ if (!ice_is_tc_ena(*tc_bitmap, tc))
+ continue;
+
+ /* Move VSI to new aggregator */
+ status = ice_sched_move_vsi_to_agg(pi, vsi_handle, agg_id, tc);
+ if (status)
+ break;
+
+ set_bit(tc, agg_vsi_info->tc_bitmap);
+ if (old_agg_vsi_info)
+ clear_bit(tc, old_agg_vsi_info->tc_bitmap);
+ }
+ if (old_agg_vsi_info && !old_agg_vsi_info->tc_bitmap[0]) {
+ list_del(&old_agg_vsi_info->list_entry);
+ devm_kfree(ice_hw_to_dev(pi->hw), old_agg_vsi_info);
+ }
+ return status;
+}
+
+/**
+ * ice_sched_rm_unused_rl_prof - remove unused RL profile
+ * @pi: port information structure
+ *
+ * This function removes unused rate limit profiles from the HW and
+ * SW DB. The caller needs to hold scheduler lock.
+ */
+static void ice_sched_rm_unused_rl_prof(struct ice_port_info *pi)
+{
+ u16 ln;
+
+ for (ln = 0; ln < pi->hw->num_tx_sched_layers; ln++) {
+ struct ice_aqc_rl_profile_info *rl_prof_elem;
+ struct ice_aqc_rl_profile_info *rl_prof_tmp;
+
+ list_for_each_entry_safe(rl_prof_elem, rl_prof_tmp,
+ &pi->rl_prof_list[ln], list_entry) {
+ if (!ice_sched_del_rl_profile(pi->hw, rl_prof_elem))
+ ice_debug(pi->hw, ICE_DBG_SCHED, "Removed rl profile\n");
+ }
+ }
+}
+
+/**
+ * ice_sched_update_elem - update element
+ * @hw: pointer to the HW struct
+ * @node: pointer to node
+ * @info: node info to update
+ *
+ * Update the HW DB, and local SW DB of node. Update the scheduling
+ * parameters of node from argument info data buffer (Info->data buf) and
+ * returns success or error on config sched element failure. The caller
+ * needs to hold scheduler lock.
+ */
+static int
+ice_sched_update_elem(struct ice_hw *hw, struct ice_sched_node *node,
+ struct ice_aqc_txsched_elem_data *info)
+{
+ struct ice_aqc_txsched_elem_data buf;
+ u16 elem_cfgd = 0;
+ u16 num_elems = 1;
+ int status;
+
+ buf = *info;
+ /* Parent TEID is reserved field in this aq call */
+ buf.parent_teid = 0;
+ /* Element type is reserved field in this aq call */
+ buf.data.elem_type = 0;
+ /* Flags is reserved field in this aq call */
+ buf.data.flags = 0;
+
+ /* Update HW DB */
+ /* Configure element node */
+ status = ice_aq_cfg_sched_elems(hw, num_elems, &buf, sizeof(buf),
+ &elem_cfgd, NULL);
+ if (status || elem_cfgd != num_elems) {
+ ice_debug(hw, ICE_DBG_SCHED, "Config sched elem error\n");
+ return -EIO;
+ }
+
+ /* Config success case */
+ /* Now update local SW DB */
+ /* Only copy the data portion of info buffer */
+ node->info.data = info->data;
+ return status;
+}
+
+/**
+ * ice_sched_cfg_node_bw_alloc - configure node BW weight/alloc params
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @rl_type: rate limit type CIR, EIR, or shared
+ * @bw_alloc: BW weight/allocation
+ *
+ * This function configures node element's BW allocation.
+ */
+static int
+ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node,
+ enum ice_rl_type rl_type, u16 bw_alloc)
+{
+ struct ice_aqc_txsched_elem_data buf;
+ struct ice_aqc_txsched_elem *data;
+
+ buf = node->info;
+ data = &buf.data;
+ if (rl_type == ICE_MIN_BW) {
+ data->valid_sections |= ICE_AQC_ELEM_VALID_CIR;
+ data->cir_bw.bw_alloc = cpu_to_le16(bw_alloc);
+ } else if (rl_type == ICE_MAX_BW) {
+ data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+ data->eir_bw.bw_alloc = cpu_to_le16(bw_alloc);
+ } else {
+ return -EINVAL;
+ }
+
+ /* Configure element */
+ return ice_sched_update_elem(hw, node, &buf);
+}
+
+/**
+ * ice_move_vsi_to_agg - moves VSI to new or default aggregator
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap of enabled TC(s)
+ *
+ * Move or associate VSI to a new or default aggregator node.
+ */
+int
+ice_move_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, u16 vsi_handle,
+ u8 tc_bitmap)
+{
+ unsigned long bitmap = tc_bitmap;
+ int status;
+
+ mutex_lock(&pi->sched_lock);
+ status = ice_sched_assoc_vsi_to_agg(pi, agg_id, vsi_handle,
+ (unsigned long *)&bitmap);
+ if (!status)
+ status = ice_save_agg_vsi_tc_bitmap(pi, agg_id, vsi_handle,
+ (unsigned long *)&bitmap);
+ mutex_unlock(&pi->sched_lock);
+ return status;
+}
+
+/**
+ * ice_set_clear_cir_bw - set or clear CIR BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear CIR bandwidth (BW) in the passed param bw_t_info.
+ */
+static void ice_set_clear_cir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+ if (bw == ICE_SCHED_DFLT_BW) {
+ clear_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap);
+ bw_t_info->cir_bw.bw = 0;
+ } else {
+ /* Save type of BW information */
+ set_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap);
+ bw_t_info->cir_bw.bw = bw;
+ }
+}
+
+/**
+ * ice_set_clear_eir_bw - set or clear EIR BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear EIR bandwidth (BW) in the passed param bw_t_info.
+ */
+static void ice_set_clear_eir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+ if (bw == ICE_SCHED_DFLT_BW) {
+ clear_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+ bw_t_info->eir_bw.bw = 0;
+ } else {
+ /* EIR BW and Shared BW profiles are mutually exclusive and
+ * hence only one of them may be set for any given element.
+ * First clear earlier saved shared BW information.
+ */
+ clear_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+ bw_t_info->shared_bw = 0;
+ /* save EIR BW information */
+ set_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+ bw_t_info->eir_bw.bw = bw;
+ }
+}
+
+/**
+ * ice_set_clear_shared_bw - set or clear shared BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear shared bandwidth (BW) in the passed param bw_t_info.
+ */
+static void ice_set_clear_shared_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+ if (bw == ICE_SCHED_DFLT_BW) {
+ clear_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+ bw_t_info->shared_bw = 0;
+ } else {
+ /* EIR BW and Shared BW profiles are mutually exclusive and
+ * hence only one of them may be set for any given element.
+ * First clear earlier saved EIR BW information.
+ */
+ clear_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+ bw_t_info->eir_bw.bw = 0;
+ /* save shared BW information */
+ set_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+ bw_t_info->shared_bw = bw;
+ }
+}
+
+/**
+ * ice_sched_save_vsi_bw - save VSI node's BW information
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save BW information of VSI type node for post replay use.
+ */
+static int
+ice_sched_save_vsi_bw(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ enum ice_rl_type rl_type, u32 bw)
+{
+ struct ice_vsi_ctx *vsi_ctx;
+
+ if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+ return -EINVAL;
+ vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
+ if (!vsi_ctx)
+ return -EINVAL;
+ switch (rl_type) {
+ case ICE_MIN_BW:
+ ice_set_clear_cir_bw(&vsi_ctx->sched.bw_t_info[tc], bw);
+ break;
+ case ICE_MAX_BW:
+ ice_set_clear_eir_bw(&vsi_ctx->sched.bw_t_info[tc], bw);
+ break;
+ case ICE_SHARED_BW:
+ ice_set_clear_shared_bw(&vsi_ctx->sched.bw_t_info[tc], bw);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * ice_sched_calc_wakeup - calculate RL profile wakeup parameter
+ * @hw: pointer to the HW struct
+ * @bw: bandwidth in Kbps
+ *
+ * This function calculates the wakeup parameter of RL profile.
+ */
+static u16 ice_sched_calc_wakeup(struct ice_hw *hw, s32 bw)
+{
+ s64 bytes_per_sec, wakeup_int, wakeup_a, wakeup_b, wakeup_f;
+ s32 wakeup_f_int;
+ u16 wakeup = 0;
+
+ /* Get the wakeup integer value */
+ bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE);
+ wakeup_int = div64_long(hw->psm_clk_freq, bytes_per_sec);
+ if (wakeup_int > 63) {
+ wakeup = (u16)((1 << 15) | wakeup_int);
+ } else {
+ /* Calculate fraction value up to 4 decimals
+ * Convert Integer value to a constant multiplier
+ */
+ wakeup_b = (s64)ICE_RL_PROF_MULTIPLIER * wakeup_int;
+ wakeup_a = div64_long((s64)ICE_RL_PROF_MULTIPLIER *
+ hw->psm_clk_freq, bytes_per_sec);
+
+ /* Get Fraction value */
+ wakeup_f = wakeup_a - wakeup_b;
+
+ /* Round up the Fractional value via Ceil(Fractional value) */
+ if (wakeup_f > div64_long(ICE_RL_PROF_MULTIPLIER, 2))
+ wakeup_f += 1;
+
+ wakeup_f_int = (s32)div64_long(wakeup_f * ICE_RL_PROF_FRACTION,
+ ICE_RL_PROF_MULTIPLIER);
+ wakeup |= (u16)(wakeup_int << 9);
+ wakeup |= (u16)(0x1ff & wakeup_f_int);
+ }
+
+ return wakeup;
+}
+
+/**
+ * ice_sched_bw_to_rl_profile - convert BW to profile parameters
+ * @hw: pointer to the HW struct
+ * @bw: bandwidth in Kbps
+ * @profile: profile parameters to return
+ *
+ * This function converts the BW to profile structure format.
+ */
+static int
+ice_sched_bw_to_rl_profile(struct ice_hw *hw, u32 bw,
+ struct ice_aqc_rl_profile_elem *profile)
+{
+ s64 bytes_per_sec, ts_rate, mv_tmp;
+ int status = -EINVAL;
+ bool found = false;
+ s32 encode = 0;
+ s64 mv = 0;
+ s32 i;
+
+ /* Bw settings range is from 0.5Mb/sec to 100Gb/sec */
+ if (bw < ICE_SCHED_MIN_BW || bw > ICE_SCHED_MAX_BW)
+ return status;
+
+ /* Bytes per second from Kbps */
+ bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE);
+
+ /* encode is 6 bits but really useful are 5 bits */
+ for (i = 0; i < 64; i++) {
+ u64 pow_result = BIT_ULL(i);
+
+ ts_rate = div64_long((s64)hw->psm_clk_freq,
+ pow_result * ICE_RL_PROF_TS_MULTIPLIER);
+ if (ts_rate <= 0)
+ continue;
+
+ /* Multiplier value */
+ mv_tmp = div64_long(bytes_per_sec * ICE_RL_PROF_MULTIPLIER,
+ ts_rate);
+
+ /* Round to the nearest ICE_RL_PROF_MULTIPLIER */
+ mv = round_up_64bit(mv_tmp, ICE_RL_PROF_MULTIPLIER);
+
+ /* First multiplier value greater than the given
+ * accuracy bytes
+ */
+ if (mv > ICE_RL_PROF_ACCURACY_BYTES) {
+ encode = i;
+ found = true;
+ break;
+ }
+ }
+ if (found) {
+ u16 wm;
+
+ wm = ice_sched_calc_wakeup(hw, bw);
+ profile->rl_multiply = cpu_to_le16(mv);
+ profile->wake_up_calc = cpu_to_le16(wm);
+ profile->rl_encode = cpu_to_le16(encode);
+ status = 0;
+ } else {
+ status = -ENOENT;
+ }
+
+ return status;
+}
+
+/**
+ * ice_sched_add_rl_profile - add RL profile
+ * @pi: port information structure
+ * @rl_type: type of rate limit BW - min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ * @layer_num: specifies in which layer to create profile
+ *
+ * This function first checks the existing list for corresponding BW
+ * parameter. If it exists, it returns the associated profile otherwise
+ * it creates a new rate limit profile for requested BW, and adds it to
+ * the HW DB and local list. It returns the new profile or null on error.
+ * The caller needs to hold the scheduler lock.
+ */
+static struct ice_aqc_rl_profile_info *
+ice_sched_add_rl_profile(struct ice_port_info *pi,
+ enum ice_rl_type rl_type, u32 bw, u8 layer_num)
+{
+ struct ice_aqc_rl_profile_info *rl_prof_elem;
+ u16 profiles_added = 0, num_profiles = 1;
+ struct ice_aqc_rl_profile_elem *buf;
+ struct ice_hw *hw;
+ u8 profile_type;
+ int status;
+
+ if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM)
+ return NULL;
+ switch (rl_type) {
+ case ICE_MIN_BW:
+ profile_type = ICE_AQC_RL_PROFILE_TYPE_CIR;
+ break;
+ case ICE_MAX_BW:
+ profile_type = ICE_AQC_RL_PROFILE_TYPE_EIR;
+ break;
+ case ICE_SHARED_BW:
+ profile_type = ICE_AQC_RL_PROFILE_TYPE_SRL;
+ break;
+ default:
+ return NULL;
+ }
+
+ if (!pi)
+ return NULL;
+ hw = pi->hw;
+ list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num],
+ list_entry)
+ if ((rl_prof_elem->profile.flags & ICE_AQC_RL_PROFILE_TYPE_M) ==
+ profile_type && rl_prof_elem->bw == bw)
+ /* Return existing profile ID info */
+ return rl_prof_elem;
+
+ /* Create new profile ID */
+ rl_prof_elem = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*rl_prof_elem),
+ GFP_KERNEL);
+
+ if (!rl_prof_elem)
+ return NULL;
+
+ status = ice_sched_bw_to_rl_profile(hw, bw, &rl_prof_elem->profile);
+ if (status)
+ goto exit_add_rl_prof;
+
+ rl_prof_elem->bw = bw;
+ /* layer_num is zero relative, and fw expects level from 1 to 9 */
+ rl_prof_elem->profile.level = layer_num + 1;
+ rl_prof_elem->profile.flags = profile_type;
+ rl_prof_elem->profile.max_burst_size = cpu_to_le16(hw->max_burst_size);
+
+ /* Create new entry in HW DB */
+ buf = &rl_prof_elem->profile;
+ status = ice_aq_add_rl_profile(hw, num_profiles, buf, sizeof(*buf),
+ &profiles_added, NULL);
+ if (status || profiles_added != num_profiles)
+ goto exit_add_rl_prof;
+
+ /* Good entry - add in the list */
+ rl_prof_elem->prof_id_ref = 0;
+ list_add(&rl_prof_elem->list_entry, &pi->rl_prof_list[layer_num]);
+ return rl_prof_elem;
+
+exit_add_rl_prof:
+ devm_kfree(ice_hw_to_dev(hw), rl_prof_elem);
+ return NULL;
+}
+
+/**
+ * ice_sched_cfg_node_bw_lmt - configure node sched params
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @rl_type: rate limit type CIR, EIR, or shared
+ * @rl_prof_id: rate limit profile ID
+ *
+ * This function configures node element's BW limit.
+ */
+static int
+ice_sched_cfg_node_bw_lmt(struct ice_hw *hw, struct ice_sched_node *node,
+ enum ice_rl_type rl_type, u16 rl_prof_id)
+{
+ struct ice_aqc_txsched_elem_data buf;
+ struct ice_aqc_txsched_elem *data;
+
+ buf = node->info;
+ data = &buf.data;
+ switch (rl_type) {
+ case ICE_MIN_BW:
+ data->valid_sections |= ICE_AQC_ELEM_VALID_CIR;
+ data->cir_bw.bw_profile_idx = cpu_to_le16(rl_prof_id);
+ break;
+ case ICE_MAX_BW:
+ /* EIR BW and Shared BW profiles are mutually exclusive and
+ * hence only one of them may be set for any given element
+ */
+ if (data->valid_sections & ICE_AQC_ELEM_VALID_SHARED)
+ return -EIO;
+ data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+ data->eir_bw.bw_profile_idx = cpu_to_le16(rl_prof_id);
+ break;
+ case ICE_SHARED_BW:
+ /* Check for removing shared BW */
+ if (rl_prof_id == ICE_SCHED_NO_SHARED_RL_PROF_ID) {
+ /* remove shared profile */
+ data->valid_sections &= ~ICE_AQC_ELEM_VALID_SHARED;
+ data->srl_id = 0; /* clear SRL field */
+
+ /* enable back EIR to default profile */
+ data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+ data->eir_bw.bw_profile_idx =
+ cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+ break;
+ }
+ /* EIR BW and Shared BW profiles are mutually exclusive and
+ * hence only one of them may be set for any given element
+ */
+ if ((data->valid_sections & ICE_AQC_ELEM_VALID_EIR) &&
+ (le16_to_cpu(data->eir_bw.bw_profile_idx) !=
+ ICE_SCHED_DFLT_RL_PROF_ID))
+ return -EIO;
+ /* EIR BW is set to default, disable it */
+ data->valid_sections &= ~ICE_AQC_ELEM_VALID_EIR;
+ /* Okay to enable shared BW now */
+ data->valid_sections |= ICE_AQC_ELEM_VALID_SHARED;
+ data->srl_id = cpu_to_le16(rl_prof_id);
+ break;
+ default:
+ /* Unknown rate limit type */
+ return -EINVAL;
+ }
+
+ /* Configure element */
+ return ice_sched_update_elem(hw, node, &buf);
+}
+
+/**
+ * ice_sched_get_node_rl_prof_id - get node's rate limit profile ID
+ * @node: sched node
+ * @rl_type: rate limit type
+ *
+ * If existing profile matches, it returns the corresponding rate
+ * limit profile ID, otherwise it returns an invalid ID as error.
+ */
+static u16
+ice_sched_get_node_rl_prof_id(struct ice_sched_node *node,
+ enum ice_rl_type rl_type)
+{
+ u16 rl_prof_id = ICE_SCHED_INVAL_PROF_ID;
+ struct ice_aqc_txsched_elem *data;
+
+ data = &node->info.data;
+ switch (rl_type) {
+ case ICE_MIN_BW:
+ if (data->valid_sections & ICE_AQC_ELEM_VALID_CIR)
+ rl_prof_id = le16_to_cpu(data->cir_bw.bw_profile_idx);
+ break;
+ case ICE_MAX_BW:
+ if (data->valid_sections & ICE_AQC_ELEM_VALID_EIR)
+ rl_prof_id = le16_to_cpu(data->eir_bw.bw_profile_idx);
+ break;
+ case ICE_SHARED_BW:
+ if (data->valid_sections & ICE_AQC_ELEM_VALID_SHARED)
+ rl_prof_id = le16_to_cpu(data->srl_id);
+ break;
+ default:
+ break;
+ }
+
+ return rl_prof_id;
+}
+
+/**
+ * ice_sched_get_rl_prof_layer - selects rate limit profile creation layer
+ * @pi: port information structure
+ * @rl_type: type of rate limit BW - min, max, or shared
+ * @layer_index: layer index
+ *
+ * This function returns requested profile creation layer.
+ */
+static u8
+ice_sched_get_rl_prof_layer(struct ice_port_info *pi, enum ice_rl_type rl_type,
+ u8 layer_index)
+{
+ struct ice_hw *hw = pi->hw;
+
+ if (layer_index >= hw->num_tx_sched_layers)
+ return ICE_SCHED_INVAL_LAYER_NUM;
+ switch (rl_type) {
+ case ICE_MIN_BW:
+ if (hw->layer_info[layer_index].max_cir_rl_profiles)
+ return layer_index;
+ break;
+ case ICE_MAX_BW:
+ if (hw->layer_info[layer_index].max_eir_rl_profiles)
+ return layer_index;
+ break;
+ case ICE_SHARED_BW:
+ /* if current layer doesn't support SRL profile creation
+ * then try a layer up or down.
+ */
+ if (hw->layer_info[layer_index].max_srl_profiles)
+ return layer_index;
+ else if (layer_index < hw->num_tx_sched_layers - 1 &&
+ hw->layer_info[layer_index + 1].max_srl_profiles)
+ return layer_index + 1;
+ else if (layer_index > 0 &&
+ hw->layer_info[layer_index - 1].max_srl_profiles)
+ return layer_index - 1;
+ break;
+ default:
+ break;
+ }
+ return ICE_SCHED_INVAL_LAYER_NUM;
+}
+
+/**
+ * ice_sched_get_srl_node - get shared rate limit node
+ * @node: tree node
+ * @srl_layer: shared rate limit layer
+ *
+ * This function returns SRL node to be used for shared rate limit purpose.
+ * The caller needs to hold scheduler lock.
+ */
+static struct ice_sched_node *
+ice_sched_get_srl_node(struct ice_sched_node *node, u8 srl_layer)
+{
+ if (srl_layer > node->tx_sched_layer)
+ return node->children[0];
+ else if (srl_layer < node->tx_sched_layer)
+ /* Node can't be created without a parent. It will always
+ * have a valid parent except root node.
+ */
+ return node->parent;
+ else
+ return node;
+}
+
+/**
+ * ice_sched_rm_rl_profile - remove RL profile ID
+ * @pi: port information structure
+ * @layer_num: layer number where profiles are saved
+ * @profile_type: profile type like EIR, CIR, or SRL
+ * @profile_id: profile ID to remove
+ *
+ * This function removes rate limit profile from layer 'layer_num' of type
+ * 'profile_type' and profile ID as 'profile_id'. The caller needs to hold
+ * scheduler lock.
+ */
+static int
+ice_sched_rm_rl_profile(struct ice_port_info *pi, u8 layer_num, u8 profile_type,
+ u16 profile_id)
+{
+ struct ice_aqc_rl_profile_info *rl_prof_elem;
+ int status = 0;
+
+ if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM)
+ return -EINVAL;
+ /* Check the existing list for RL profile */
+ list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num],
+ list_entry)
+ if ((rl_prof_elem->profile.flags & ICE_AQC_RL_PROFILE_TYPE_M) ==
+ profile_type &&
+ le16_to_cpu(rl_prof_elem->profile.profile_id) ==
+ profile_id) {
+ if (rl_prof_elem->prof_id_ref)
+ rl_prof_elem->prof_id_ref--;
+
+ /* Remove old profile ID from database */
+ status = ice_sched_del_rl_profile(pi->hw, rl_prof_elem);
+ if (status && status != -EBUSY)
+ ice_debug(pi->hw, ICE_DBG_SCHED, "Remove rl profile failed\n");
+ break;
+ }
+ if (status == -EBUSY)
+ status = 0;
+ return status;
+}
+
+/**
+ * ice_sched_set_node_bw_dflt - set node's bandwidth limit to default
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @rl_type: rate limit type min, max, or shared
+ * @layer_num: layer number where RL profiles are saved
+ *
+ * This function configures node element's BW rate limit profile ID of
+ * type CIR, EIR, or SRL to default. This function needs to be called
+ * with the scheduler lock held.
+ */
+static int
+ice_sched_set_node_bw_dflt(struct ice_port_info *pi,
+ struct ice_sched_node *node,
+ enum ice_rl_type rl_type, u8 layer_num)
+{
+ struct ice_hw *hw;
+ u8 profile_type;
+ u16 rl_prof_id;
+ u16 old_id;
+ int status;
+
+ hw = pi->hw;
+ switch (rl_type) {
+ case ICE_MIN_BW:
+ profile_type = ICE_AQC_RL_PROFILE_TYPE_CIR;
+ rl_prof_id = ICE_SCHED_DFLT_RL_PROF_ID;
+ break;
+ case ICE_MAX_BW:
+ profile_type = ICE_AQC_RL_PROFILE_TYPE_EIR;
+ rl_prof_id = ICE_SCHED_DFLT_RL_PROF_ID;
+ break;
+ case ICE_SHARED_BW:
+ profile_type = ICE_AQC_RL_PROFILE_TYPE_SRL;
+ /* No SRL is configured for default case */
+ rl_prof_id = ICE_SCHED_NO_SHARED_RL_PROF_ID;
+ break;
+ default:
+ return -EINVAL;
+ }
+ /* Save existing RL prof ID for later clean up */
+ old_id = ice_sched_get_node_rl_prof_id(node, rl_type);
+ /* Configure BW scheduling parameters */
+ status = ice_sched_cfg_node_bw_lmt(hw, node, rl_type, rl_prof_id);
+ if (status)
+ return status;
+
+ /* Remove stale RL profile ID */
+ if (old_id == ICE_SCHED_DFLT_RL_PROF_ID ||
+ old_id == ICE_SCHED_INVAL_PROF_ID)
+ return 0;
+
+ return ice_sched_rm_rl_profile(pi, layer_num, profile_type, old_id);
+}
+
+/**
+ * ice_sched_set_eir_srl_excl - set EIR/SRL exclusiveness
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @layer_num: layer number where rate limit profiles are saved
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth value
+ *
+ * This function prepares node element's bandwidth to SRL or EIR exclusively.
+ * EIR BW and Shared BW profiles are mutually exclusive and hence only one of
+ * them may be set for any given element. This function needs to be called
+ * with the scheduler lock held.
+ */
+static int
+ice_sched_set_eir_srl_excl(struct ice_port_info *pi,
+ struct ice_sched_node *node,
+ u8 layer_num, enum ice_rl_type rl_type, u32 bw)
+{
+ if (rl_type == ICE_SHARED_BW) {
+ /* SRL node passed in this case, it may be different node */
+ if (bw == ICE_SCHED_DFLT_BW)
+ /* SRL being removed, ice_sched_cfg_node_bw_lmt()
+ * enables EIR to default. EIR is not set in this
+ * case, so no additional action is required.
+ */
+ return 0;
+
+ /* SRL being configured, set EIR to default here.
+ * ice_sched_cfg_node_bw_lmt() disables EIR when it
+ * configures SRL
+ */
+ return ice_sched_set_node_bw_dflt(pi, node, ICE_MAX_BW,
+ layer_num);
+ } else if (rl_type == ICE_MAX_BW &&
+ node->info.data.valid_sections & ICE_AQC_ELEM_VALID_SHARED) {
+ /* Remove Shared profile. Set default shared BW call
+ * removes shared profile for a node.
+ */
+ return ice_sched_set_node_bw_dflt(pi, node,
+ ICE_SHARED_BW,
+ layer_num);
+ }
+ return 0;
+}
+
+/**
+ * ice_sched_set_node_bw - set node's bandwidth
+ * @pi: port information structure
+ * @node: tree node
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ * @layer_num: layer number
+ *
+ * This function adds new profile corresponding to requested BW, configures
+ * node's RL profile ID of type CIR, EIR, or SRL, and removes old profile
+ * ID from local database. The caller needs to hold scheduler lock.
+ */
+static int
+ice_sched_set_node_bw(struct ice_port_info *pi, struct ice_sched_node *node,
+ enum ice_rl_type rl_type, u32 bw, u8 layer_num)
+{
+ struct ice_aqc_rl_profile_info *rl_prof_info;
+ struct ice_hw *hw = pi->hw;
+ u16 old_id, rl_prof_id;
+ int status = -EINVAL;
+
+ rl_prof_info = ice_sched_add_rl_profile(pi, rl_type, bw, layer_num);
+ if (!rl_prof_info)
+ return status;
+
+ rl_prof_id = le16_to_cpu(rl_prof_info->profile.profile_id);
+
+ /* Save existing RL prof ID for later clean up */
+ old_id = ice_sched_get_node_rl_prof_id(node, rl_type);
+ /* Configure BW scheduling parameters */
+ status = ice_sched_cfg_node_bw_lmt(hw, node, rl_type, rl_prof_id);
+ if (status)
+ return status;
+
+ /* New changes has been applied */
+ /* Increment the profile ID reference count */
+ rl_prof_info->prof_id_ref++;
+
+ /* Check for old ID removal */
+ if ((old_id == ICE_SCHED_DFLT_RL_PROF_ID && rl_type != ICE_SHARED_BW) ||
+ old_id == ICE_SCHED_INVAL_PROF_ID || old_id == rl_prof_id)
+ return 0;
+
+ return ice_sched_rm_rl_profile(pi, layer_num,
+ rl_prof_info->profile.flags &
+ ICE_AQC_RL_PROFILE_TYPE_M, old_id);
+}
+
+/**
+ * ice_sched_set_node_bw_lmt - set node's BW limit
+ * @pi: port information structure
+ * @node: tree node
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * It updates node's BW limit parameters like BW RL profile ID of type CIR,
+ * EIR, or SRL. The caller needs to hold scheduler lock.
+ */
+static int
+ice_sched_set_node_bw_lmt(struct ice_port_info *pi, struct ice_sched_node *node,
+ enum ice_rl_type rl_type, u32 bw)
+{
+ struct ice_sched_node *cfg_node = node;
+ int status;
+
+ struct ice_hw *hw;
+ u8 layer_num;
+
+ if (!pi)
+ return -EINVAL;
+ hw = pi->hw;
+ /* Remove unused RL profile IDs from HW and SW DB */
+ ice_sched_rm_unused_rl_prof(pi);
+ layer_num = ice_sched_get_rl_prof_layer(pi, rl_type,
+ node->tx_sched_layer);
+ if (layer_num >= hw->num_tx_sched_layers)
+ return -EINVAL;
+
+ if (rl_type == ICE_SHARED_BW) {
+ /* SRL node may be different */
+ cfg_node = ice_sched_get_srl_node(node, layer_num);
+ if (!cfg_node)
+ return -EIO;
+ }
+ /* EIR BW and Shared BW profiles are mutually exclusive and
+ * hence only one of them may be set for any given element
+ */
+ status = ice_sched_set_eir_srl_excl(pi, cfg_node, layer_num, rl_type,
+ bw);
+ if (status)
+ return status;
+ if (bw == ICE_SCHED_DFLT_BW)
+ return ice_sched_set_node_bw_dflt(pi, cfg_node, rl_type,
+ layer_num);
+ return ice_sched_set_node_bw(pi, cfg_node, rl_type, bw, layer_num);
+}
+
+/**
+ * ice_sched_set_node_bw_dflt_lmt - set node's BW limit to default
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @rl_type: rate limit type min, max, or shared
+ *
+ * This function configures node element's BW rate limit profile ID of
+ * type CIR, EIR, or SRL to default. This function needs to be called
+ * with the scheduler lock held.
+ */
+static int
+ice_sched_set_node_bw_dflt_lmt(struct ice_port_info *pi,
+ struct ice_sched_node *node,
+ enum ice_rl_type rl_type)
+{
+ return ice_sched_set_node_bw_lmt(pi, node, rl_type,
+ ICE_SCHED_DFLT_BW);
+}
+
+/**
+ * ice_sched_validate_srl_node - Check node for SRL applicability
+ * @node: sched node to configure
+ * @sel_layer: selected SRL layer
+ *
+ * This function checks if the SRL can be applied to a selected layer node on
+ * behalf of the requested node (first argument). This function needs to be
+ * called with scheduler lock held.
+ */
+static int
+ice_sched_validate_srl_node(struct ice_sched_node *node, u8 sel_layer)
+{
+ /* SRL profiles are not available on all layers. Check if the
+ * SRL profile can be applied to a node above or below the
+ * requested node. SRL configuration is possible only if the
+ * selected layer's node has single child.
+ */
+ if (sel_layer == node->tx_sched_layer ||
+ ((sel_layer == node->tx_sched_layer + 1) &&
+ node->num_children == 1) ||
+ ((sel_layer == node->tx_sched_layer - 1) &&
+ (node->parent && node->parent->num_children == 1)))
+ return 0;
+
+ return -EIO;
+}
+
+/**
+ * ice_sched_save_q_bw - save queue node's BW information
+ * @q_ctx: queue context structure
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save BW information of queue type node for post replay use.
+ */
+static int
+ice_sched_save_q_bw(struct ice_q_ctx *q_ctx, enum ice_rl_type rl_type, u32 bw)
+{
+ switch (rl_type) {
+ case ICE_MIN_BW:
+ ice_set_clear_cir_bw(&q_ctx->bw_t_info, bw);
+ break;
+ case ICE_MAX_BW:
+ ice_set_clear_eir_bw(&q_ctx->bw_t_info, bw);
+ break;
+ case ICE_SHARED_BW:
+ ice_set_clear_shared_bw(&q_ctx->bw_t_info, bw);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * ice_sched_set_q_bw_lmt - sets queue BW limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ * @bw: bandwidth in Kbps
+ *
+ * This function sets BW limit of queue scheduling node.
+ */
+static int
+ice_sched_set_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ u16 q_handle, enum ice_rl_type rl_type, u32 bw)
+{
+ struct ice_sched_node *node;
+ struct ice_q_ctx *q_ctx;
+ int status = -EINVAL;
+
+ if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+ return -EINVAL;
+ mutex_lock(&pi->sched_lock);
+ q_ctx = ice_get_lan_q_ctx(pi->hw, vsi_handle, tc, q_handle);
+ if (!q_ctx)
+ goto exit_q_bw_lmt;
+ node = ice_sched_find_node_by_teid(pi->root, q_ctx->q_teid);
+ if (!node) {
+ ice_debug(pi->hw, ICE_DBG_SCHED, "Wrong q_teid\n");
+ goto exit_q_bw_lmt;
+ }
+
+ /* Return error if it is not a leaf node */
+ if (node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF)
+ goto exit_q_bw_lmt;
+
+ /* SRL bandwidth layer selection */
+ if (rl_type == ICE_SHARED_BW) {
+ u8 sel_layer; /* selected layer */
+
+ sel_layer = ice_sched_get_rl_prof_layer(pi, rl_type,
+ node->tx_sched_layer);
+ if (sel_layer >= pi->hw->num_tx_sched_layers) {
+ status = -EINVAL;
+ goto exit_q_bw_lmt;
+ }
+ status = ice_sched_validate_srl_node(node, sel_layer);
+ if (status)
+ goto exit_q_bw_lmt;
+ }
+
+ if (bw == ICE_SCHED_DFLT_BW)
+ status = ice_sched_set_node_bw_dflt_lmt(pi, node, rl_type);
+ else
+ status = ice_sched_set_node_bw_lmt(pi, node, rl_type, bw);
+
+ if (!status)
+ status = ice_sched_save_q_bw(q_ctx, rl_type, bw);
+
+exit_q_bw_lmt:
+ mutex_unlock(&pi->sched_lock);
+ return status;
+}
+
+/**
+ * ice_cfg_q_bw_lmt - configure queue BW limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ * @bw: bandwidth in Kbps
+ *
+ * This function configures BW limit of queue scheduling node.
+ */
+int
+ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ u16 q_handle, enum ice_rl_type rl_type, u32 bw)
+{
+ return ice_sched_set_q_bw_lmt(pi, vsi_handle, tc, q_handle, rl_type,
+ bw);
+}
+
+/**
+ * ice_cfg_q_bw_dflt_lmt - configure queue BW default limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ *
+ * This function configures BW default limit of queue scheduling node.
+ */
+int
+ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ u16 q_handle, enum ice_rl_type rl_type)
+{
+ return ice_sched_set_q_bw_lmt(pi, vsi_handle, tc, q_handle, rl_type,
+ ICE_SCHED_DFLT_BW);
+}
+
+/**
+ * ice_sched_get_node_by_id_type - get node from ID type
+ * @pi: port information structure
+ * @id: identifier
+ * @agg_type: type of aggregator
+ * @tc: traffic class
+ *
+ * This function returns node identified by ID of type aggregator, and
+ * based on traffic class (TC). This function needs to be called with
+ * the scheduler lock held.
+ */
+static struct ice_sched_node *
+ice_sched_get_node_by_id_type(struct ice_port_info *pi, u32 id,
+ enum ice_agg_type agg_type, u8 tc)
+{
+ struct ice_sched_node *node = NULL;
+
+ switch (agg_type) {
+ case ICE_AGG_TYPE_VSI: {
+ struct ice_vsi_ctx *vsi_ctx;
+ u16 vsi_handle = (u16)id;
+
+ if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+ break;
+ /* Get sched_vsi_info */
+ vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
+ if (!vsi_ctx)
+ break;
+ node = vsi_ctx->sched.vsi_node[tc];
+ break;
+ }
+
+ case ICE_AGG_TYPE_AGG: {
+ struct ice_sched_node *tc_node;
+
+ tc_node = ice_sched_get_tc_node(pi, tc);
+ if (tc_node)
+ node = ice_sched_get_agg_node(pi, tc_node, id);
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ return node;
+}
+
+/**
+ * ice_sched_set_node_bw_lmt_per_tc - set node BW limit per TC
+ * @pi: port information structure
+ * @id: ID (software VSI handle or AGG ID)
+ * @agg_type: aggregator type (VSI or AGG type node)
+ * @tc: traffic class
+ * @rl_type: min or max
+ * @bw: bandwidth in Kbps
+ *
+ * This function sets BW limit of VSI or Aggregator scheduling node
+ * based on TC information from passed in argument BW.
+ */
+int
+ice_sched_set_node_bw_lmt_per_tc(struct ice_port_info *pi, u32 id,
+ enum ice_agg_type agg_type, u8 tc,
+ enum ice_rl_type rl_type, u32 bw)
+{
+ struct ice_sched_node *node;
+ int status = -EINVAL;
+
+ if (!pi)
+ return status;
+
+ if (rl_type == ICE_UNKNOWN_BW)
+ return status;
+
+ mutex_lock(&pi->sched_lock);
+ node = ice_sched_get_node_by_id_type(pi, id, agg_type, tc);
+ if (!node) {
+ ice_debug(pi->hw, ICE_DBG_SCHED, "Wrong id, agg type, or tc\n");
+ goto exit_set_node_bw_lmt_per_tc;
+ }
+ if (bw == ICE_SCHED_DFLT_BW)
+ status = ice_sched_set_node_bw_dflt_lmt(pi, node, rl_type);
+ else
+ status = ice_sched_set_node_bw_lmt(pi, node, rl_type, bw);
+
+exit_set_node_bw_lmt_per_tc:
+ mutex_unlock(&pi->sched_lock);
+ return status;
+}
+
+/**
+ * ice_cfg_vsi_bw_lmt_per_tc - configure VSI BW limit per TC
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: traffic class
+ * @rl_type: min or max
+ * @bw: bandwidth in Kbps
+ *
+ * This function configures BW limit of VSI scheduling node based on TC
+ * information.
+ */
+int
+ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ enum ice_rl_type rl_type, u32 bw)
+{
+ int status;
+
+ status = ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle,
+ ICE_AGG_TYPE_VSI,
+ tc, rl_type, bw);
+ if (!status) {
+ mutex_lock(&pi->sched_lock);
+ status = ice_sched_save_vsi_bw(pi, vsi_handle, tc, rl_type, bw);
+ mutex_unlock(&pi->sched_lock);
+ }
+ return status;
+}
+
+/**
+ * ice_cfg_vsi_bw_dflt_lmt_per_tc - configure default VSI BW limit per TC
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: traffic class
+ * @rl_type: min or max
+ *
+ * This function configures default BW limit of VSI scheduling node based on TC
+ * information.
+ */
+int
+ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ enum ice_rl_type rl_type)
+{
+ int status;
+
+ status = ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle,
+ ICE_AGG_TYPE_VSI,
+ tc, rl_type,
+ ICE_SCHED_DFLT_BW);
+ if (!status) {
+ mutex_lock(&pi->sched_lock);
+ status = ice_sched_save_vsi_bw(pi, vsi_handle, tc, rl_type,
+ ICE_SCHED_DFLT_BW);
+ mutex_unlock(&pi->sched_lock);
+ }
+ return status;
+}
+
+/**
+ * ice_cfg_rl_burst_size - Set burst size value
+ * @hw: pointer to the HW struct
+ * @bytes: burst size in bytes
+ *
+ * This function configures/set the burst size to requested new value. The new
+ * burst size value is used for future rate limit calls. It doesn't change the
+ * existing or previously created RL profiles.
+ */
+int ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes)
+{
+ u16 burst_size_to_prog;
+
+ if (bytes < ICE_MIN_BURST_SIZE_ALLOWED ||
+ bytes > ICE_MAX_BURST_SIZE_ALLOWED)
+ return -EINVAL;
+ if (ice_round_to_num(bytes, 64) <=
+ ICE_MAX_BURST_SIZE_64_BYTE_GRANULARITY) {
+ /* 64 byte granularity case */
+ /* Disable MSB granularity bit */
+ burst_size_to_prog = ICE_64_BYTE_GRANULARITY;
+ /* round number to nearest 64 byte granularity */
+ bytes = ice_round_to_num(bytes, 64);
+ /* The value is in 64 byte chunks */
+ burst_size_to_prog |= (u16)(bytes / 64);
+ } else {
+ /* k bytes granularity case */
+ /* Enable MSB granularity bit */
+ burst_size_to_prog = ICE_KBYTE_GRANULARITY;
+ /* round number to nearest 1024 granularity */
+ bytes = ice_round_to_num(bytes, 1024);
+ /* check rounding doesn't go beyond allowed */
+ if (bytes > ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY)
+ bytes = ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY;
+ /* The value is in k bytes */
+ burst_size_to_prog |= (u16)(bytes / 1024);
+ }
+ hw->max_burst_size = burst_size_to_prog;
+ return 0;
+}
+
+/**
+ * ice_sched_replay_node_prio - re-configure node priority
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @priority: priority value
+ *
+ * This function configures node element's priority value. It
+ * needs to be called with scheduler lock held.
+ */
+static int
+ice_sched_replay_node_prio(struct ice_hw *hw, struct ice_sched_node *node,
+ u8 priority)
+{
+ struct ice_aqc_txsched_elem_data buf;
+ struct ice_aqc_txsched_elem *data;
+ int status;
+
+ buf = node->info;
+ data = &buf.data;
+ data->valid_sections |= ICE_AQC_ELEM_VALID_GENERIC;
+ data->generic = priority;
+
+ /* Configure element */
+ status = ice_sched_update_elem(hw, node, &buf);
+ return status;
+}
+
+/**
+ * ice_sched_replay_node_bw - replay node(s) BW
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @bw_t_info: BW type information
+ *
+ * This function restores node's BW from bw_t_info. The caller needs
+ * to hold the scheduler lock.
+ */
+static int
+ice_sched_replay_node_bw(struct ice_hw *hw, struct ice_sched_node *node,
+ struct ice_bw_type_info *bw_t_info)
+{
+ struct ice_port_info *pi = hw->port_info;
+ int status = -EINVAL;
+ u16 bw_alloc;
+
+ if (!node)
+ return status;
+ if (bitmap_empty(bw_t_info->bw_t_bitmap, ICE_BW_TYPE_CNT))
+ return 0;
+ if (test_bit(ICE_BW_TYPE_PRIO, bw_t_info->bw_t_bitmap)) {
+ status = ice_sched_replay_node_prio(hw, node,
+ bw_t_info->generic);
+ if (status)
+ return status;
+ }
+ if (test_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap)) {
+ status = ice_sched_set_node_bw_lmt(pi, node, ICE_MIN_BW,
+ bw_t_info->cir_bw.bw);
+ if (status)
+ return status;
+ }
+ if (test_bit(ICE_BW_TYPE_CIR_WT, bw_t_info->bw_t_bitmap)) {
+ bw_alloc = bw_t_info->cir_bw.bw_alloc;
+ status = ice_sched_cfg_node_bw_alloc(hw, node, ICE_MIN_BW,
+ bw_alloc);
+ if (status)
+ return status;
+ }
+ if (test_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap)) {
+ status = ice_sched_set_node_bw_lmt(pi, node, ICE_MAX_BW,
+ bw_t_info->eir_bw.bw);
+ if (status)
+ return status;
+ }
+ if (test_bit(ICE_BW_TYPE_EIR_WT, bw_t_info->bw_t_bitmap)) {
+ bw_alloc = bw_t_info->eir_bw.bw_alloc;
+ status = ice_sched_cfg_node_bw_alloc(hw, node, ICE_MAX_BW,
+ bw_alloc);
+ if (status)
+ return status;
+ }
+ if (test_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap))
+ status = ice_sched_set_node_bw_lmt(pi, node, ICE_SHARED_BW,
+ bw_t_info->shared_bw);
+ return status;
+}
+
+/**
+ * ice_sched_get_ena_tc_bitmap - get enabled TC bitmap
+ * @pi: port info struct
+ * @tc_bitmap: 8 bits TC bitmap to check
+ * @ena_tc_bitmap: 8 bits enabled TC bitmap to return
+ *
+ * This function returns enabled TC bitmap in variable ena_tc_bitmap. Some TCs
+ * may be missing, it returns enabled TCs. This function needs to be called with
+ * scheduler lock held.
+ */
+static void
+ice_sched_get_ena_tc_bitmap(struct ice_port_info *pi,
+ unsigned long *tc_bitmap,
+ unsigned long *ena_tc_bitmap)
+{
+ u8 tc;
+
+ /* Some TC(s) may be missing after reset, adjust for replay */
+ ice_for_each_traffic_class(tc)
+ if (ice_is_tc_ena(*tc_bitmap, tc) &&
+ (ice_sched_get_tc_node(pi, tc)))
+ set_bit(tc, ena_tc_bitmap);
+}
+
+/**
+ * ice_sched_replay_agg - recreate aggregator node(s)
+ * @hw: pointer to the HW struct
+ *
+ * This function recreate aggregator type nodes which are not replayed earlier.
+ * It also replay aggregator BW information. These aggregator nodes are not
+ * associated with VSI type node yet.
+ */
+void ice_sched_replay_agg(struct ice_hw *hw)
+{
+ struct ice_port_info *pi = hw->port_info;
+ struct ice_sched_agg_info *agg_info;
+
+ mutex_lock(&pi->sched_lock);
+ list_for_each_entry(agg_info, &hw->agg_list, list_entry)
+ /* replay aggregator (re-create aggregator node) */
+ if (!bitmap_equal(agg_info->tc_bitmap, agg_info->replay_tc_bitmap,
+ ICE_MAX_TRAFFIC_CLASS)) {
+ DECLARE_BITMAP(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+ int status;
+
+ bitmap_zero(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+ ice_sched_get_ena_tc_bitmap(pi,
+ agg_info->replay_tc_bitmap,
+ replay_bitmap);
+ status = ice_sched_cfg_agg(hw->port_info,
+ agg_info->agg_id,
+ ICE_AGG_TYPE_AGG,
+ replay_bitmap);
+ if (status) {
+ dev_info(ice_hw_to_dev(hw),
+ "Replay agg id[%d] failed\n",
+ agg_info->agg_id);
+ /* Move on to next one */
+ continue;
+ }
+ }
+ mutex_unlock(&pi->sched_lock);
+}
+
+/**
+ * ice_sched_replay_agg_vsi_preinit - Agg/VSI replay pre initialization
+ * @hw: pointer to the HW struct
+ *
+ * This function initialize aggregator(s) TC bitmap to zero. A required
+ * preinit step for replaying aggregators.
+ */
+void ice_sched_replay_agg_vsi_preinit(struct ice_hw *hw)
+{
+ struct ice_port_info *pi = hw->port_info;
+ struct ice_sched_agg_info *agg_info;
+
+ mutex_lock(&pi->sched_lock);
+ list_for_each_entry(agg_info, &hw->agg_list, list_entry) {
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+
+ agg_info->tc_bitmap[0] = 0;
+ list_for_each_entry(agg_vsi_info, &agg_info->agg_vsi_list,
+ list_entry)
+ agg_vsi_info->tc_bitmap[0] = 0;
+ }
+ mutex_unlock(&pi->sched_lock);
+}
+
+/**
+ * ice_sched_replay_vsi_agg - replay aggregator & VSI to aggregator node(s)
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ *
+ * This function replays aggregator node, VSI to aggregator type nodes, and
+ * their node bandwidth information. This function needs to be called with
+ * scheduler lock held.
+ */
+static int ice_sched_replay_vsi_agg(struct ice_hw *hw, u16 vsi_handle)
+{
+ DECLARE_BITMAP(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+ struct ice_sched_agg_vsi_info *agg_vsi_info;
+ struct ice_port_info *pi = hw->port_info;
+ struct ice_sched_agg_info *agg_info;
+ int status;
+
+ bitmap_zero(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+ agg_info = ice_get_vsi_agg_info(hw, vsi_handle);
+ if (!agg_info)
+ return 0; /* Not present in list - default Agg case */
+ agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle);
+ if (!agg_vsi_info)
+ return 0; /* Not present in list - default Agg case */
+ ice_sched_get_ena_tc_bitmap(pi, agg_info->replay_tc_bitmap,
+ replay_bitmap);
+ /* Replay aggregator node associated to vsi_handle */
+ status = ice_sched_cfg_agg(hw->port_info, agg_info->agg_id,
+ ICE_AGG_TYPE_AGG, replay_bitmap);
+ if (status)
+ return status;
+
+ bitmap_zero(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+ ice_sched_get_ena_tc_bitmap(pi, agg_vsi_info->replay_tc_bitmap,
+ replay_bitmap);
+ /* Move this VSI (vsi_handle) to above aggregator */
+ return ice_sched_assoc_vsi_to_agg(pi, agg_info->agg_id, vsi_handle,
+ replay_bitmap);
+}
+
+/**
+ * ice_replay_vsi_agg - replay VSI to aggregator node
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ *
+ * This function replays association of VSI to aggregator type nodes, and
+ * node bandwidth information.
+ */
+int ice_replay_vsi_agg(struct ice_hw *hw, u16 vsi_handle)
+{
+ struct ice_port_info *pi = hw->port_info;
+ int status;
+
+ mutex_lock(&pi->sched_lock);
+ status = ice_sched_replay_vsi_agg(hw, vsi_handle);
+ mutex_unlock(&pi->sched_lock);
+ return status;
+}
+
+/**
+ * ice_sched_replay_q_bw - replay queue type node BW
+ * @pi: port information structure
+ * @q_ctx: queue context structure
+ *
+ * This function replays queue type node bandwidth. This function needs to be
+ * called with scheduler lock held.
+ */
+int ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx)
+{
+ struct ice_sched_node *q_node;
+
+ /* Following also checks the presence of node in tree */
+ q_node = ice_sched_find_node_by_teid(pi->root, q_ctx->q_teid);
+ if (!q_node)
+ return -EINVAL;
+ return ice_sched_replay_node_bw(pi->hw, q_node, &q_ctx->bw_t_info);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
new file mode 100644
index 000000000..4f91577fe
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sched.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_SCHED_H_
+#define _ICE_SCHED_H_
+
+#include "ice_common.h"
+
+#define ICE_QGRP_LAYER_OFFSET 2
+#define ICE_VSI_LAYER_OFFSET 4
+#define ICE_AGG_LAYER_OFFSET 6
+#define ICE_SCHED_INVAL_LAYER_NUM 0xFF
+/* Burst size is a 12 bits register that is configured while creating the RL
+ * profile(s). MSB is a granularity bit and tells the granularity type
+ * 0 - LSB bits are in 64 bytes granularity
+ * 1 - LSB bits are in 1K bytes granularity
+ */
+#define ICE_64_BYTE_GRANULARITY 0
+#define ICE_KBYTE_GRANULARITY BIT(11)
+#define ICE_MIN_BURST_SIZE_ALLOWED 64 /* In Bytes */
+#define ICE_MAX_BURST_SIZE_ALLOWED \
+ ((BIT(11) - 1) * 1024) /* In Bytes */
+#define ICE_MAX_BURST_SIZE_64_BYTE_GRANULARITY \
+ ((BIT(11) - 1) * 64) /* In Bytes */
+#define ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY ICE_MAX_BURST_SIZE_ALLOWED
+
+#define ICE_RL_PROF_ACCURACY_BYTES 128
+#define ICE_RL_PROF_MULTIPLIER 10000
+#define ICE_RL_PROF_TS_MULTIPLIER 32
+#define ICE_RL_PROF_FRACTION 512
+
+#define ICE_PSM_CLK_367MHZ_IN_HZ 367647059
+#define ICE_PSM_CLK_416MHZ_IN_HZ 416666667
+#define ICE_PSM_CLK_446MHZ_IN_HZ 446428571
+#define ICE_PSM_CLK_390MHZ_IN_HZ 390625000
+
+/* BW rate limit profile parameters list entry along
+ * with bandwidth maintained per layer in port info
+ */
+struct ice_aqc_rl_profile_info {
+ struct ice_aqc_rl_profile_elem profile;
+ struct list_head list_entry;
+ u32 bw; /* requested */
+ u16 prof_id_ref; /* profile ID to node association ref count */
+};
+
+struct ice_sched_agg_vsi_info {
+ struct list_head list_entry;
+ DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
+ u16 vsi_handle;
+ /* save aggregator VSI TC bitmap */
+ DECLARE_BITMAP(replay_tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
+};
+
+struct ice_sched_agg_info {
+ struct list_head agg_vsi_list;
+ struct list_head list_entry;
+ DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
+ u32 agg_id;
+ enum ice_agg_type agg_type;
+ /* bw_t_info saves aggregator BW information */
+ struct ice_bw_type_info bw_t_info[ICE_MAX_TRAFFIC_CLASS];
+ /* save aggregator TC bitmap */
+ DECLARE_BITMAP(replay_tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
+};
+
+/* FW AQ command calls */
+int
+ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
+ struct ice_aqc_txsched_elem_data *buf, u16 buf_size,
+ u16 *elems_ret, struct ice_sq_cd *cd);
+int ice_sched_init_port(struct ice_port_info *pi);
+int ice_sched_query_res_alloc(struct ice_hw *hw);
+void ice_sched_get_psm_clk_freq(struct ice_hw *hw);
+
+void ice_sched_clear_port(struct ice_port_info *pi);
+void ice_sched_cleanup_all(struct ice_hw *hw);
+void ice_sched_clear_agg(struct ice_hw *hw);
+
+struct ice_sched_node *
+ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid);
+int
+ice_sched_add_node(struct ice_port_info *pi, u8 layer,
+ struct ice_aqc_txsched_elem_data *info);
+void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node);
+struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc);
+struct ice_sched_node *
+ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ u8 owner);
+int
+ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
+ u8 owner, bool enable);
+int ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle);
+int ice_rm_vsi_rdma_cfg(struct ice_port_info *pi, u16 vsi_handle);
+
+/* Tx scheduler rate limiter functions */
+int
+ice_cfg_agg(struct ice_port_info *pi, u32 agg_id,
+ enum ice_agg_type agg_type, u8 tc_bitmap);
+int
+ice_move_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, u16 vsi_handle,
+ u8 tc_bitmap);
+int
+ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ u16 q_handle, enum ice_rl_type rl_type, u32 bw);
+int
+ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ u16 q_handle, enum ice_rl_type rl_type);
+int
+ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ enum ice_rl_type rl_type, u32 bw);
+int
+ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ enum ice_rl_type rl_type);
+int
+ice_sched_set_node_bw_lmt_per_tc(struct ice_port_info *pi, u32 id,
+ enum ice_agg_type agg_type, u8 tc,
+ enum ice_rl_type rl_type, u32 bw);
+int ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes);
+void ice_sched_replay_agg_vsi_preinit(struct ice_hw *hw);
+void ice_sched_replay_agg(struct ice_hw *hw);
+int ice_replay_vsi_agg(struct ice_hw *hw, u16 vsi_handle);
+int ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx);
+#endif /* _ICE_SCHED_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
new file mode 100644
index 000000000..b719e9a77
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -0,0 +1,1911 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_vf_lib_private.h"
+#include "ice_base.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
+#include "ice_dcb_lib.h"
+#include "ice_flow.h"
+#include "ice_eswitch.h"
+#include "ice_virtchnl_allowlist.h"
+#include "ice_flex_pipe.h"
+#include "ice_vf_vsi_vlan_ops.h"
+#include "ice_vlan.h"
+
+/**
+ * ice_free_vf_entries - Free all VF entries from the hash table
+ * @pf: pointer to the PF structure
+ *
+ * Iterate over the VF hash table, removing and releasing all VF entries.
+ * Called during VF teardown or as cleanup during failed VF initialization.
+ */
+static void ice_free_vf_entries(struct ice_pf *pf)
+{
+ struct ice_vfs *vfs = &pf->vfs;
+ struct hlist_node *tmp;
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ /* Remove all VFs from the hash table and release their main
+ * reference. Once all references to the VF are dropped, ice_put_vf()
+ * will call ice_release_vf which will remove the VF memory.
+ */
+ lockdep_assert_held(&vfs->table_lock);
+
+ hash_for_each_safe(vfs->table, bkt, tmp, vf, entry) {
+ hash_del_rcu(&vf->entry);
+ ice_put_vf(vf);
+ }
+}
+
+/**
+ * ice_vf_vsi_release - invalidate the VF's VSI after freeing it
+ * @vf: invalidate this VF's VSI after freeing it
+ */
+static void ice_vf_vsi_release(struct ice_vf *vf)
+{
+ struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+
+ if (WARN_ON(!vsi))
+ return;
+
+ ice_vsi_release(vsi);
+ ice_vf_invalidate_vsi(vf);
+}
+
+/**
+ * ice_free_vf_res - Free a VF's resources
+ * @vf: pointer to the VF info
+ */
+static void ice_free_vf_res(struct ice_vf *vf)
+{
+ struct ice_pf *pf = vf->pf;
+ int i, last_vector_idx;
+
+ /* First, disable VF's configuration API to prevent OS from
+ * accessing the VF's VSI after it's freed or invalidated.
+ */
+ clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
+ ice_vf_fdir_exit(vf);
+ /* free VF control VSI */
+ if (vf->ctrl_vsi_idx != ICE_NO_VSI)
+ ice_vf_ctrl_vsi_release(vf);
+
+ /* free VSI and disconnect it from the parent uplink */
+ if (vf->lan_vsi_idx != ICE_NO_VSI) {
+ ice_vf_vsi_release(vf);
+ vf->num_mac = 0;
+ }
+
+ last_vector_idx = vf->first_vector_idx + pf->vfs.num_msix_per - 1;
+
+ /* clear VF MDD event information */
+ memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events));
+ memset(&vf->mdd_rx_events, 0, sizeof(vf->mdd_rx_events));
+
+ /* Disable interrupts so that VF starts in a known state */
+ for (i = vf->first_vector_idx; i <= last_vector_idx; i++) {
+ wr32(&pf->hw, GLINT_DYN_CTL(i), GLINT_DYN_CTL_CLEARPBA_M);
+ ice_flush(&pf->hw);
+ }
+ /* reset some of the state variables keeping track of the resources */
+ clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
+ clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
+}
+
+/**
+ * ice_dis_vf_mappings
+ * @vf: pointer to the VF structure
+ */
+static void ice_dis_vf_mappings(struct ice_vf *vf)
+{
+ struct ice_pf *pf = vf->pf;
+ struct ice_vsi *vsi;
+ struct device *dev;
+ int first, last, v;
+ struct ice_hw *hw;
+
+ hw = &pf->hw;
+ vsi = ice_get_vf_vsi(vf);
+ if (WARN_ON(!vsi))
+ return;
+
+ dev = ice_pf_to_dev(pf);
+ wr32(hw, VPINT_ALLOC(vf->vf_id), 0);
+ wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0);
+
+ first = vf->first_vector_idx;
+ last = first + pf->vfs.num_msix_per - 1;
+ for (v = first; v <= last; v++) {
+ u32 reg;
+
+ reg = (((1 << GLINT_VECT2FUNC_IS_PF_S) &
+ GLINT_VECT2FUNC_IS_PF_M) |
+ ((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) &
+ GLINT_VECT2FUNC_PF_NUM_M));
+ wr32(hw, GLINT_VECT2FUNC(v), reg);
+ }
+
+ if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG)
+ wr32(hw, VPLAN_TX_QBASE(vf->vf_id), 0);
+ else
+ dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n");
+
+ if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG)
+ wr32(hw, VPLAN_RX_QBASE(vf->vf_id), 0);
+ else
+ dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n");
+}
+
+/**
+ * ice_sriov_free_msix_res - Reset/free any used MSIX resources
+ * @pf: pointer to the PF structure
+ *
+ * Since no MSIX entries are taken from the pf->irq_tracker then just clear
+ * the pf->sriov_base_vector.
+ *
+ * Returns 0 on success, and -EINVAL on error.
+ */
+static int ice_sriov_free_msix_res(struct ice_pf *pf)
+{
+ struct ice_res_tracker *res;
+
+ if (!pf)
+ return -EINVAL;
+
+ res = pf->irq_tracker;
+ if (!res)
+ return -EINVAL;
+
+ /* give back irq_tracker resources used */
+ WARN_ON(pf->sriov_base_vector < res->num_entries);
+
+ pf->sriov_base_vector = 0;
+
+ return 0;
+}
+
+/**
+ * ice_free_vfs - Free all VFs
+ * @pf: pointer to the PF structure
+ */
+void ice_free_vfs(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_vfs *vfs = &pf->vfs;
+ struct ice_hw *hw = &pf->hw;
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ if (!ice_has_vfs(pf))
+ return;
+
+ while (test_and_set_bit(ICE_VF_DIS, pf->state))
+ usleep_range(1000, 2000);
+
+ /* Disable IOV before freeing resources. This lets any VF drivers
+ * running in the host get themselves cleaned up before we yank
+ * the carpet out from underneath their feet.
+ */
+ if (!pci_vfs_assigned(pf->pdev))
+ pci_disable_sriov(pf->pdev);
+ else
+ dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n");
+
+ mutex_lock(&vfs->table_lock);
+
+ ice_eswitch_release(pf);
+
+ ice_for_each_vf(pf, bkt, vf) {
+ mutex_lock(&vf->cfg_lock);
+
+ ice_dis_vf_qs(vf);
+
+ if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+ /* disable VF qp mappings and set VF disable state */
+ ice_dis_vf_mappings(vf);
+ set_bit(ICE_VF_STATE_DIS, vf->vf_states);
+ ice_free_vf_res(vf);
+ }
+
+ if (!pci_vfs_assigned(pf->pdev)) {
+ u32 reg_idx, bit_idx;
+
+ reg_idx = (hw->func_caps.vf_base_id + vf->vf_id) / 32;
+ bit_idx = (hw->func_caps.vf_base_id + vf->vf_id) % 32;
+ wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx));
+ }
+
+ /* clear malicious info since the VF is getting released */
+ if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs,
+ ICE_MAX_SRIOV_VFS, vf->vf_id))
+ dev_dbg(dev, "failed to clear malicious VF state for VF %u\n",
+ vf->vf_id);
+
+ mutex_unlock(&vf->cfg_lock);
+ }
+
+ if (ice_sriov_free_msix_res(pf))
+ dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n");
+
+ vfs->num_qps_per = 0;
+ ice_free_vf_entries(pf);
+
+ mutex_unlock(&vfs->table_lock);
+
+ clear_bit(ICE_VF_DIS, pf->state);
+ clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
+}
+
+/**
+ * ice_vf_vsi_setup - Set up a VF VSI
+ * @vf: VF to setup VSI for
+ *
+ * Returns pointer to the successfully allocated VSI struct on success,
+ * otherwise returns NULL on failure.
+ */
+static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf)
+{
+ struct ice_port_info *pi = ice_vf_get_port_info(vf);
+ struct ice_pf *pf = vf->pf;
+ struct ice_vsi *vsi;
+
+ vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf, NULL);
+
+ if (!vsi) {
+ dev_err(ice_pf_to_dev(pf), "Failed to create VF VSI\n");
+ ice_vf_invalidate_vsi(vf);
+ return NULL;
+ }
+
+ vf->lan_vsi_idx = vsi->idx;
+ vf->lan_vsi_num = vsi->vsi_num;
+
+ return vsi;
+}
+
+/**
+ * ice_calc_vf_first_vector_idx - Calculate MSIX vector index in the PF space
+ * @pf: pointer to PF structure
+ * @vf: pointer to VF that the first MSIX vector index is being calculated for
+ *
+ * This returns the first MSIX vector index in PF space that is used by this VF.
+ * This index is used when accessing PF relative registers such as
+ * GLINT_VECT2FUNC and GLINT_DYN_CTL.
+ * This will always be the OICR index in the AVF driver so any functionality
+ * using vf->first_vector_idx for queue configuration will have to increment by
+ * 1 to avoid meddling with the OICR index.
+ */
+static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf)
+{
+ return pf->sriov_base_vector + vf->vf_id * pf->vfs.num_msix_per;
+}
+
+/**
+ * ice_ena_vf_msix_mappings - enable VF MSIX mappings in hardware
+ * @vf: VF to enable MSIX mappings for
+ *
+ * Some of the registers need to be indexed/configured using hardware global
+ * device values and other registers need 0-based values, which represent PF
+ * based values.
+ */
+static void ice_ena_vf_msix_mappings(struct ice_vf *vf)
+{
+ int device_based_first_msix, device_based_last_msix;
+ int pf_based_first_msix, pf_based_last_msix, v;
+ struct ice_pf *pf = vf->pf;
+ int device_based_vf_id;
+ struct ice_hw *hw;
+ u32 reg;
+
+ hw = &pf->hw;
+ pf_based_first_msix = vf->first_vector_idx;
+ pf_based_last_msix = (pf_based_first_msix + pf->vfs.num_msix_per) - 1;
+
+ device_based_first_msix = pf_based_first_msix +
+ pf->hw.func_caps.common_cap.msix_vector_first_id;
+ device_based_last_msix =
+ (device_based_first_msix + pf->vfs.num_msix_per) - 1;
+ device_based_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
+
+ reg = (((device_based_first_msix << VPINT_ALLOC_FIRST_S) &
+ VPINT_ALLOC_FIRST_M) |
+ ((device_based_last_msix << VPINT_ALLOC_LAST_S) &
+ VPINT_ALLOC_LAST_M) | VPINT_ALLOC_VALID_M);
+ wr32(hw, VPINT_ALLOC(vf->vf_id), reg);
+
+ reg = (((device_based_first_msix << VPINT_ALLOC_PCI_FIRST_S)
+ & VPINT_ALLOC_PCI_FIRST_M) |
+ ((device_based_last_msix << VPINT_ALLOC_PCI_LAST_S) &
+ VPINT_ALLOC_PCI_LAST_M) | VPINT_ALLOC_PCI_VALID_M);
+ wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), reg);
+
+ /* map the interrupts to its functions */
+ for (v = pf_based_first_msix; v <= pf_based_last_msix; v++) {
+ reg = (((device_based_vf_id << GLINT_VECT2FUNC_VF_NUM_S) &
+ GLINT_VECT2FUNC_VF_NUM_M) |
+ ((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) &
+ GLINT_VECT2FUNC_PF_NUM_M));
+ wr32(hw, GLINT_VECT2FUNC(v), reg);
+ }
+
+ /* Map mailbox interrupt to VF MSI-X vector 0 */
+ wr32(hw, VPINT_MBX_CTL(device_based_vf_id), VPINT_MBX_CTL_CAUSE_ENA_M);
+}
+
+/**
+ * ice_ena_vf_q_mappings - enable Rx/Tx queue mappings for a VF
+ * @vf: VF to enable the mappings for
+ * @max_txq: max Tx queues allowed on the VF's VSI
+ * @max_rxq: max Rx queues allowed on the VF's VSI
+ */
+static void ice_ena_vf_q_mappings(struct ice_vf *vf, u16 max_txq, u16 max_rxq)
+{
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+ struct ice_hw *hw = &vf->pf->hw;
+ u32 reg;
+
+ if (WARN_ON(!vsi))
+ return;
+
+ /* set regardless of mapping mode */
+ wr32(hw, VPLAN_TXQ_MAPENA(vf->vf_id), VPLAN_TXQ_MAPENA_TX_ENA_M);
+
+ /* VF Tx queues allocation */
+ if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG) {
+ /* set the VF PF Tx queue range
+ * VFNUMQ value should be set to (number of queues - 1). A value
+ * of 0 means 1 queue and a value of 255 means 256 queues
+ */
+ reg = (((vsi->txq_map[0] << VPLAN_TX_QBASE_VFFIRSTQ_S) &
+ VPLAN_TX_QBASE_VFFIRSTQ_M) |
+ (((max_txq - 1) << VPLAN_TX_QBASE_VFNUMQ_S) &
+ VPLAN_TX_QBASE_VFNUMQ_M));
+ wr32(hw, VPLAN_TX_QBASE(vf->vf_id), reg);
+ } else {
+ dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n");
+ }
+
+ /* set regardless of mapping mode */
+ wr32(hw, VPLAN_RXQ_MAPENA(vf->vf_id), VPLAN_RXQ_MAPENA_RX_ENA_M);
+
+ /* VF Rx queues allocation */
+ if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG) {
+ /* set the VF PF Rx queue range
+ * VFNUMQ value should be set to (number of queues - 1). A value
+ * of 0 means 1 queue and a value of 255 means 256 queues
+ */
+ reg = (((vsi->rxq_map[0] << VPLAN_RX_QBASE_VFFIRSTQ_S) &
+ VPLAN_RX_QBASE_VFFIRSTQ_M) |
+ (((max_rxq - 1) << VPLAN_RX_QBASE_VFNUMQ_S) &
+ VPLAN_RX_QBASE_VFNUMQ_M));
+ wr32(hw, VPLAN_RX_QBASE(vf->vf_id), reg);
+ } else {
+ dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n");
+ }
+}
+
+/**
+ * ice_ena_vf_mappings - enable VF MSIX and queue mapping
+ * @vf: pointer to the VF structure
+ */
+static void ice_ena_vf_mappings(struct ice_vf *vf)
+{
+ struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+
+ if (WARN_ON(!vsi))
+ return;
+
+ ice_ena_vf_msix_mappings(vf);
+ ice_ena_vf_q_mappings(vf, vsi->alloc_txq, vsi->alloc_rxq);
+}
+
+/**
+ * ice_calc_vf_reg_idx - Calculate the VF's register index in the PF space
+ * @vf: VF to calculate the register index for
+ * @q_vector: a q_vector associated to the VF
+ */
+int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector)
+{
+ struct ice_pf *pf;
+
+ if (!vf || !q_vector)
+ return -EINVAL;
+
+ pf = vf->pf;
+
+ /* always add one to account for the OICR being the first MSIX */
+ return pf->sriov_base_vector + pf->vfs.num_msix_per * vf->vf_id +
+ q_vector->v_idx + 1;
+}
+
+/**
+ * ice_get_max_valid_res_idx - Get the max valid resource index
+ * @res: pointer to the resource to find the max valid index for
+ *
+ * Start from the end of the ice_res_tracker and return right when we find the
+ * first res->list entry with the ICE_RES_VALID_BIT set. This function is only
+ * valid for SR-IOV because it is the only consumer that manipulates the
+ * res->end and this is always called when res->end is set to res->num_entries.
+ */
+static int ice_get_max_valid_res_idx(struct ice_res_tracker *res)
+{
+ int i;
+
+ if (!res)
+ return -EINVAL;
+
+ for (i = res->num_entries - 1; i >= 0; i--)
+ if (res->list[i] & ICE_RES_VALID_BIT)
+ return i;
+
+ return 0;
+}
+
+/**
+ * ice_sriov_set_msix_res - Set any used MSIX resources
+ * @pf: pointer to PF structure
+ * @num_msix_needed: number of MSIX vectors needed for all SR-IOV VFs
+ *
+ * This function allows SR-IOV resources to be taken from the end of the PF's
+ * allowed HW MSIX vectors so that the irq_tracker will not be affected. We
+ * just set the pf->sriov_base_vector and return success.
+ *
+ * If there are not enough resources available, return an error. This should
+ * always be caught by ice_set_per_vf_res().
+ *
+ * Return 0 on success, and -EINVAL when there are not enough MSIX vectors
+ * in the PF's space available for SR-IOV.
+ */
+static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed)
+{
+ u16 total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors;
+ int vectors_used = pf->irq_tracker->num_entries;
+ int sriov_base_vector;
+
+ sriov_base_vector = total_vectors - num_msix_needed;
+
+ /* make sure we only grab irq_tracker entries from the list end and
+ * that we have enough available MSIX vectors
+ */
+ if (sriov_base_vector < vectors_used)
+ return -EINVAL;
+
+ pf->sriov_base_vector = sriov_base_vector;
+
+ return 0;
+}
+
+/**
+ * ice_set_per_vf_res - check if vectors and queues are available
+ * @pf: pointer to the PF structure
+ * @num_vfs: the number of SR-IOV VFs being configured
+ *
+ * First, determine HW interrupts from common pool. If we allocate fewer VFs, we
+ * get more vectors and can enable more queues per VF. Note that this does not
+ * grab any vectors from the SW pool already allocated. Also note, that all
+ * vector counts include one for each VF's miscellaneous interrupt vector
+ * (i.e. OICR).
+ *
+ * Minimum VFs - 2 vectors, 1 queue pair
+ * Small VFs - 5 vectors, 4 queue pairs
+ * Medium VFs - 17 vectors, 16 queue pairs
+ *
+ * Second, determine number of queue pairs per VF by starting with a pre-defined
+ * maximum each VF supports. If this is not possible, then we adjust based on
+ * queue pairs available on the device.
+ *
+ * Lastly, set queue and MSI-X VF variables tracked by the PF so it can be used
+ * by each VF during VF initialization and reset.
+ */
+static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs)
+{
+ int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker);
+ u16 num_msix_per_vf, num_txq, num_rxq, avail_qs;
+ int msix_avail_per_vf, msix_avail_for_sriov;
+ struct device *dev = ice_pf_to_dev(pf);
+ int err;
+
+ lockdep_assert_held(&pf->vfs.table_lock);
+
+ if (!num_vfs)
+ return -EINVAL;
+
+ if (max_valid_res_idx < 0)
+ return -ENOSPC;
+
+ /* determine MSI-X resources per VF */
+ msix_avail_for_sriov = pf->hw.func_caps.common_cap.num_msix_vectors -
+ pf->irq_tracker->num_entries;
+ msix_avail_per_vf = msix_avail_for_sriov / num_vfs;
+ if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MED) {
+ num_msix_per_vf = ICE_NUM_VF_MSIX_MED;
+ } else if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_SMALL) {
+ num_msix_per_vf = ICE_NUM_VF_MSIX_SMALL;
+ } else if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MULTIQ_MIN) {
+ num_msix_per_vf = ICE_NUM_VF_MSIX_MULTIQ_MIN;
+ } else if (msix_avail_per_vf >= ICE_MIN_INTR_PER_VF) {
+ num_msix_per_vf = ICE_MIN_INTR_PER_VF;
+ } else {
+ dev_err(dev, "Only %d MSI-X interrupts available for SR-IOV. Not enough to support minimum of %d MSI-X interrupts per VF for %d VFs\n",
+ msix_avail_for_sriov, ICE_MIN_INTR_PER_VF,
+ num_vfs);
+ return -ENOSPC;
+ }
+
+ num_txq = min_t(u16, num_msix_per_vf - ICE_NONQ_VECS_VF,
+ ICE_MAX_RSS_QS_PER_VF);
+ avail_qs = ice_get_avail_txq_count(pf) / num_vfs;
+ if (!avail_qs)
+ num_txq = 0;
+ else if (num_txq > avail_qs)
+ num_txq = rounddown_pow_of_two(avail_qs);
+
+ num_rxq = min_t(u16, num_msix_per_vf - ICE_NONQ_VECS_VF,
+ ICE_MAX_RSS_QS_PER_VF);
+ avail_qs = ice_get_avail_rxq_count(pf) / num_vfs;
+ if (!avail_qs)
+ num_rxq = 0;
+ else if (num_rxq > avail_qs)
+ num_rxq = rounddown_pow_of_two(avail_qs);
+
+ if (num_txq < ICE_MIN_QS_PER_VF || num_rxq < ICE_MIN_QS_PER_VF) {
+ dev_err(dev, "Not enough queues to support minimum of %d queue pairs per VF for %d VFs\n",
+ ICE_MIN_QS_PER_VF, num_vfs);
+ return -ENOSPC;
+ }
+
+ err = ice_sriov_set_msix_res(pf, num_msix_per_vf * num_vfs);
+ if (err) {
+ dev_err(dev, "Unable to set MSI-X resources for %d VFs, err %d\n",
+ num_vfs, err);
+ return err;
+ }
+
+ /* only allow equal Tx/Rx queue count (i.e. queue pairs) */
+ pf->vfs.num_qps_per = min_t(int, num_txq, num_rxq);
+ pf->vfs.num_msix_per = num_msix_per_vf;
+ dev_info(dev, "Enabling %d VFs with %d vectors and %d queues per VF\n",
+ num_vfs, pf->vfs.num_msix_per, pf->vfs.num_qps_per);
+
+ return 0;
+}
+
+/**
+ * ice_init_vf_vsi_res - initialize/setup VF VSI resources
+ * @vf: VF to initialize/setup the VSI for
+ *
+ * This function creates a VSI for the VF, adds a VLAN 0 filter, and sets up the
+ * VF VSI's broadcast filter and is only used during initial VF creation.
+ */
+static int ice_init_vf_vsi_res(struct ice_vf *vf)
+{
+ struct ice_vsi_vlan_ops *vlan_ops;
+ struct ice_pf *pf = vf->pf;
+ u8 broadcast[ETH_ALEN];
+ struct ice_vsi *vsi;
+ struct device *dev;
+ int err;
+
+ vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf);
+
+ dev = ice_pf_to_dev(pf);
+ vsi = ice_vf_vsi_setup(vf);
+ if (!vsi)
+ return -ENOMEM;
+
+ err = ice_vsi_add_vlan_zero(vsi);
+ if (err) {
+ dev_warn(dev, "Failed to add VLAN 0 filter for VF %d\n",
+ vf->vf_id);
+ goto release_vsi;
+ }
+
+ vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+ err = vlan_ops->ena_rx_filtering(vsi);
+ if (err) {
+ dev_warn(dev, "Failed to enable Rx VLAN filtering for VF %d\n",
+ vf->vf_id);
+ goto release_vsi;
+ }
+
+ eth_broadcast_addr(broadcast);
+ err = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI);
+ if (err) {
+ dev_err(dev, "Failed to add broadcast MAC filter for VF %d, error %d\n",
+ vf->vf_id, err);
+ goto release_vsi;
+ }
+
+ err = ice_vsi_apply_spoofchk(vsi, vf->spoofchk);
+ if (err) {
+ dev_warn(dev, "Failed to initialize spoofchk setting for VF %d\n",
+ vf->vf_id);
+ goto release_vsi;
+ }
+
+ vf->num_mac = 1;
+
+ return 0;
+
+release_vsi:
+ ice_vf_vsi_release(vf);
+ return err;
+}
+
+/**
+ * ice_start_vfs - start VFs so they are ready to be used by SR-IOV
+ * @pf: PF the VFs are associated with
+ */
+static int ice_start_vfs(struct ice_pf *pf)
+{
+ struct ice_hw *hw = &pf->hw;
+ unsigned int bkt, it_cnt;
+ struct ice_vf *vf;
+ int retval;
+
+ lockdep_assert_held(&pf->vfs.table_lock);
+
+ it_cnt = 0;
+ ice_for_each_vf(pf, bkt, vf) {
+ vf->vf_ops->clear_reset_trigger(vf);
+
+ retval = ice_init_vf_vsi_res(vf);
+ if (retval) {
+ dev_err(ice_pf_to_dev(pf), "Failed to initialize VSI resources for VF %d, error %d\n",
+ vf->vf_id, retval);
+ goto teardown;
+ }
+
+ set_bit(ICE_VF_STATE_INIT, vf->vf_states);
+ ice_ena_vf_mappings(vf);
+ wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE);
+ it_cnt++;
+ }
+
+ ice_flush(hw);
+ return 0;
+
+teardown:
+ ice_for_each_vf(pf, bkt, vf) {
+ if (it_cnt == 0)
+ break;
+
+ ice_dis_vf_mappings(vf);
+ ice_vf_vsi_release(vf);
+ it_cnt--;
+ }
+
+ return retval;
+}
+
+/**
+ * ice_sriov_free_vf - Free VF memory after all references are dropped
+ * @vf: pointer to VF to free
+ *
+ * Called by ice_put_vf through ice_release_vf once the last reference to a VF
+ * structure has been dropped.
+ */
+static void ice_sriov_free_vf(struct ice_vf *vf)
+{
+ mutex_destroy(&vf->cfg_lock);
+
+ kfree_rcu(vf, rcu);
+}
+
+/**
+ * ice_sriov_clear_reset_state - clears VF Reset status register
+ * @vf: the vf to configure
+ */
+static void ice_sriov_clear_reset_state(struct ice_vf *vf)
+{
+ struct ice_hw *hw = &vf->pf->hw;
+
+ /* Clear the reset status register so that VF immediately sees that
+ * the device is resetting, even if hardware hasn't yet gotten around
+ * to clearing VFGEN_RSTAT for us.
+ */
+ wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_INPROGRESS);
+}
+
+/**
+ * ice_sriov_clear_mbx_register - clears SRIOV VF's mailbox registers
+ * @vf: the vf to configure
+ */
+static void ice_sriov_clear_mbx_register(struct ice_vf *vf)
+{
+ struct ice_pf *pf = vf->pf;
+
+ wr32(&pf->hw, VF_MBX_ARQLEN(vf->vf_id), 0);
+ wr32(&pf->hw, VF_MBX_ATQLEN(vf->vf_id), 0);
+}
+
+/**
+ * ice_sriov_trigger_reset_register - trigger VF reset for SRIOV VF
+ * @vf: pointer to VF structure
+ * @is_vflr: true if reset occurred due to VFLR
+ *
+ * Trigger and cleanup after a VF reset for a SR-IOV VF.
+ */
+static void ice_sriov_trigger_reset_register(struct ice_vf *vf, bool is_vflr)
+{
+ struct ice_pf *pf = vf->pf;
+ u32 reg, reg_idx, bit_idx;
+ unsigned int vf_abs_id, i;
+ struct device *dev;
+ struct ice_hw *hw;
+
+ dev = ice_pf_to_dev(pf);
+ hw = &pf->hw;
+ vf_abs_id = vf->vf_id + hw->func_caps.vf_base_id;
+
+ /* In the case of a VFLR, HW has already reset the VF and we just need
+ * to clean up. Otherwise we must first trigger the reset using the
+ * VFRTRIG register.
+ */
+ if (!is_vflr) {
+ reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_id));
+ reg |= VPGEN_VFRTRIG_VFSWR_M;
+ wr32(hw, VPGEN_VFRTRIG(vf->vf_id), reg);
+ }
+
+ /* clear the VFLR bit in GLGEN_VFLRSTAT */
+ reg_idx = (vf_abs_id) / 32;
+ bit_idx = (vf_abs_id) % 32;
+ wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx));
+ ice_flush(hw);
+
+ wr32(hw, PF_PCI_CIAA,
+ VF_DEVICE_STATUS | (vf_abs_id << PF_PCI_CIAA_VF_NUM_S));
+ for (i = 0; i < ICE_PCI_CIAD_WAIT_COUNT; i++) {
+ reg = rd32(hw, PF_PCI_CIAD);
+ /* no transactions pending so stop polling */
+ if ((reg & VF_TRANS_PENDING_M) == 0)
+ break;
+
+ dev_err(dev, "VF %u PCI transactions stuck\n", vf->vf_id);
+ udelay(ICE_PCI_CIAD_WAIT_DELAY_US);
+ }
+}
+
+/**
+ * ice_sriov_poll_reset_status - poll SRIOV VF reset status
+ * @vf: pointer to VF structure
+ *
+ * Returns true when reset is successful, else returns false
+ */
+static bool ice_sriov_poll_reset_status(struct ice_vf *vf)
+{
+ struct ice_pf *pf = vf->pf;
+ unsigned int i;
+ u32 reg;
+
+ for (i = 0; i < 10; i++) {
+ /* VF reset requires driver to first reset the VF and then
+ * poll the status register to make sure that the reset
+ * completed successfully.
+ */
+ reg = rd32(&pf->hw, VPGEN_VFRSTAT(vf->vf_id));
+ if (reg & VPGEN_VFRSTAT_VFRD_M)
+ return true;
+
+ /* only sleep if the reset is not done */
+ usleep_range(10, 20);
+ }
+ return false;
+}
+
+/**
+ * ice_sriov_clear_reset_trigger - enable VF to access hardware
+ * @vf: VF to enabled hardware access for
+ */
+static void ice_sriov_clear_reset_trigger(struct ice_vf *vf)
+{
+ struct ice_hw *hw = &vf->pf->hw;
+ u32 reg;
+
+ reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_id));
+ reg &= ~VPGEN_VFRTRIG_VFSWR_M;
+ wr32(hw, VPGEN_VFRTRIG(vf->vf_id), reg);
+ ice_flush(hw);
+}
+
+/**
+ * ice_sriov_vsi_rebuild - release and rebuild VF's VSI
+ * @vf: VF to release and setup the VSI for
+ *
+ * This is only called when a single VF is being reset (i.e. VFR, VFLR, host VF
+ * configuration change, etc.).
+ */
+static int ice_sriov_vsi_rebuild(struct ice_vf *vf)
+{
+ struct ice_pf *pf = vf->pf;
+
+ ice_vf_vsi_release(vf);
+ if (!ice_vf_vsi_setup(vf)) {
+ dev_err(ice_pf_to_dev(pf),
+ "Failed to release and setup the VF%u's VSI\n",
+ vf->vf_id);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_sriov_post_vsi_rebuild - tasks to do after the VF's VSI have been rebuilt
+ * @vf: VF to perform tasks on
+ */
+static void ice_sriov_post_vsi_rebuild(struct ice_vf *vf)
+{
+ ice_vf_rebuild_host_cfg(vf);
+ ice_vf_set_initialized(vf);
+ ice_ena_vf_mappings(vf);
+ wr32(&vf->pf->hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE);
+}
+
+static const struct ice_vf_ops ice_sriov_vf_ops = {
+ .reset_type = ICE_VF_RESET,
+ .free = ice_sriov_free_vf,
+ .clear_reset_state = ice_sriov_clear_reset_state,
+ .clear_mbx_register = ice_sriov_clear_mbx_register,
+ .trigger_reset_register = ice_sriov_trigger_reset_register,
+ .poll_reset_status = ice_sriov_poll_reset_status,
+ .clear_reset_trigger = ice_sriov_clear_reset_trigger,
+ .vsi_rebuild = ice_sriov_vsi_rebuild,
+ .post_vsi_rebuild = ice_sriov_post_vsi_rebuild,
+};
+
+/**
+ * ice_create_vf_entries - Allocate and insert VF entries
+ * @pf: pointer to the PF structure
+ * @num_vfs: the number of VFs to allocate
+ *
+ * Allocate new VF entries and insert them into the hash table. Set some
+ * basic default fields for initializing the new VFs.
+ *
+ * After this function exits, the hash table will have num_vfs entries
+ * inserted.
+ *
+ * Returns 0 on success or an integer error code on failure.
+ */
+static int ice_create_vf_entries(struct ice_pf *pf, u16 num_vfs)
+{
+ struct ice_vfs *vfs = &pf->vfs;
+ struct ice_vf *vf;
+ u16 vf_id;
+ int err;
+
+ lockdep_assert_held(&vfs->table_lock);
+
+ for (vf_id = 0; vf_id < num_vfs; vf_id++) {
+ vf = kzalloc(sizeof(*vf), GFP_KERNEL);
+ if (!vf) {
+ err = -ENOMEM;
+ goto err_free_entries;
+ }
+ kref_init(&vf->refcnt);
+
+ vf->pf = pf;
+ vf->vf_id = vf_id;
+
+ /* set sriov vf ops for VFs created during SRIOV flow */
+ vf->vf_ops = &ice_sriov_vf_ops;
+
+ vf->vf_sw_id = pf->first_sw;
+ /* assign default capabilities */
+ vf->spoofchk = true;
+ vf->num_vf_qs = pf->vfs.num_qps_per;
+ ice_vc_set_default_allowlist(vf);
+
+ /* ctrl_vsi_idx will be set to a valid value only when VF
+ * creates its first fdir rule.
+ */
+ ice_vf_ctrl_invalidate_vsi(vf);
+ ice_vf_fdir_init(vf);
+
+ ice_virtchnl_set_dflt_ops(vf);
+
+ mutex_init(&vf->cfg_lock);
+
+ hash_add_rcu(vfs->table, &vf->entry, vf_id);
+ }
+
+ return 0;
+
+err_free_entries:
+ ice_free_vf_entries(pf);
+ return err;
+}
+
+/**
+ * ice_ena_vfs - enable VFs so they are ready to be used
+ * @pf: pointer to the PF structure
+ * @num_vfs: number of VFs to enable
+ */
+static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ int ret;
+
+ /* Disable global interrupt 0 so we don't try to handle the VFLR. */
+ wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
+ ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S);
+ set_bit(ICE_OICR_INTR_DIS, pf->state);
+ ice_flush(hw);
+
+ ret = pci_enable_sriov(pf->pdev, num_vfs);
+ if (ret)
+ goto err_unroll_intr;
+
+ mutex_lock(&pf->vfs.table_lock);
+
+ ret = ice_set_per_vf_res(pf, num_vfs);
+ if (ret) {
+ dev_err(dev, "Not enough resources for %d VFs, err %d. Try with fewer number of VFs\n",
+ num_vfs, ret);
+ goto err_unroll_sriov;
+ }
+
+ ret = ice_create_vf_entries(pf, num_vfs);
+ if (ret) {
+ dev_err(dev, "Failed to allocate VF entries for %d VFs\n",
+ num_vfs);
+ goto err_unroll_sriov;
+ }
+
+ ret = ice_start_vfs(pf);
+ if (ret) {
+ dev_err(dev, "Failed to start %d VFs, err %d\n", num_vfs, ret);
+ ret = -EAGAIN;
+ goto err_unroll_vf_entries;
+ }
+
+ clear_bit(ICE_VF_DIS, pf->state);
+
+ ret = ice_eswitch_configure(pf);
+ if (ret) {
+ dev_err(dev, "Failed to configure eswitch, err %d\n", ret);
+ goto err_unroll_sriov;
+ }
+
+ /* rearm global interrupts */
+ if (test_and_clear_bit(ICE_OICR_INTR_DIS, pf->state))
+ ice_irq_dynamic_ena(hw, NULL, NULL);
+
+ mutex_unlock(&pf->vfs.table_lock);
+
+ return 0;
+
+err_unroll_vf_entries:
+ ice_free_vf_entries(pf);
+err_unroll_sriov:
+ mutex_unlock(&pf->vfs.table_lock);
+ pci_disable_sriov(pf->pdev);
+err_unroll_intr:
+ /* rearm interrupts here */
+ ice_irq_dynamic_ena(hw, NULL, NULL);
+ clear_bit(ICE_OICR_INTR_DIS, pf->state);
+ return ret;
+}
+
+/**
+ * ice_pci_sriov_ena - Enable or change number of VFs
+ * @pf: pointer to the PF structure
+ * @num_vfs: number of VFs to allocate
+ *
+ * Returns 0 on success and negative on failure
+ */
+static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs)
+{
+ int pre_existing_vfs = pci_num_vf(pf->pdev);
+ struct device *dev = ice_pf_to_dev(pf);
+ int err;
+
+ if (pre_existing_vfs && pre_existing_vfs != num_vfs)
+ ice_free_vfs(pf);
+ else if (pre_existing_vfs && pre_existing_vfs == num_vfs)
+ return 0;
+
+ if (num_vfs > pf->vfs.num_supported) {
+ dev_err(dev, "Can't enable %d VFs, max VFs supported is %d\n",
+ num_vfs, pf->vfs.num_supported);
+ return -EOPNOTSUPP;
+ }
+
+ dev_info(dev, "Enabling %d VFs\n", num_vfs);
+ err = ice_ena_vfs(pf, num_vfs);
+ if (err) {
+ dev_err(dev, "Failed to enable SR-IOV: %d\n", err);
+ return err;
+ }
+
+ set_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
+ return 0;
+}
+
+/**
+ * ice_check_sriov_allowed - check if SR-IOV is allowed based on various checks
+ * @pf: PF to enabled SR-IOV on
+ */
+static int ice_check_sriov_allowed(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+
+ if (!test_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags)) {
+ dev_err(dev, "This device is not capable of SR-IOV\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (ice_is_safe_mode(pf)) {
+ dev_err(dev, "SR-IOV cannot be configured - Device is in Safe Mode\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (!ice_pf_state_is_nominal(pf)) {
+ dev_err(dev, "Cannot enable SR-IOV, device not ready\n");
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_sriov_configure - Enable or change number of VFs via sysfs
+ * @pdev: pointer to a pci_dev structure
+ * @num_vfs: number of VFs to allocate or 0 to free VFs
+ *
+ * This function is called when the user updates the number of VFs in sysfs. On
+ * success return whatever num_vfs was set to by the caller. Return negative on
+ * failure.
+ */
+int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+ struct ice_pf *pf = pci_get_drvdata(pdev);
+ struct device *dev = ice_pf_to_dev(pf);
+ int err;
+
+ err = ice_check_sriov_allowed(pf);
+ if (err)
+ return err;
+
+ if (!num_vfs) {
+ if (!pci_vfs_assigned(pdev)) {
+ ice_free_vfs(pf);
+ ice_mbx_deinit_snapshot(&pf->hw);
+ if (pf->lag)
+ ice_enable_lag(pf->lag);
+ return 0;
+ }
+
+ dev_err(dev, "can't free VFs because some are assigned to VMs.\n");
+ return -EBUSY;
+ }
+
+ err = ice_mbx_init_snapshot(&pf->hw, num_vfs);
+ if (err)
+ return err;
+
+ err = ice_pci_sriov_ena(pf, num_vfs);
+ if (err) {
+ ice_mbx_deinit_snapshot(&pf->hw);
+ return err;
+ }
+
+ if (pf->lag)
+ ice_disable_lag(pf->lag);
+ return num_vfs;
+}
+
+/**
+ * ice_process_vflr_event - Free VF resources via IRQ calls
+ * @pf: pointer to the PF structure
+ *
+ * called from the VFLR IRQ handler to
+ * free up VF resources and state variables
+ */
+void ice_process_vflr_event(struct ice_pf *pf)
+{
+ struct ice_hw *hw = &pf->hw;
+ struct ice_vf *vf;
+ unsigned int bkt;
+ u32 reg;
+
+ if (!test_and_clear_bit(ICE_VFLR_EVENT_PENDING, pf->state) ||
+ !ice_has_vfs(pf))
+ return;
+
+ mutex_lock(&pf->vfs.table_lock);
+ ice_for_each_vf(pf, bkt, vf) {
+ u32 reg_idx, bit_idx;
+
+ reg_idx = (hw->func_caps.vf_base_id + vf->vf_id) / 32;
+ bit_idx = (hw->func_caps.vf_base_id + vf->vf_id) % 32;
+ /* read GLGEN_VFLRSTAT register to find out the flr VFs */
+ reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx));
+ if (reg & BIT(bit_idx))
+ /* GLGEN_VFLRSTAT bit will be cleared in ice_reset_vf */
+ ice_reset_vf(vf, ICE_VF_RESET_VFLR | ICE_VF_RESET_LOCK);
+ }
+ mutex_unlock(&pf->vfs.table_lock);
+}
+
+/**
+ * ice_get_vf_from_pfq - get the VF who owns the PF space queue passed in
+ * @pf: PF used to index all VFs
+ * @pfq: queue index relative to the PF's function space
+ *
+ * If no VF is found who owns the pfq then return NULL, otherwise return a
+ * pointer to the VF who owns the pfq
+ *
+ * If this function returns non-NULL, it acquires a reference count of the VF
+ * structure. The caller is responsible for calling ice_put_vf() to drop this
+ * reference.
+ */
+static struct ice_vf *ice_get_vf_from_pfq(struct ice_pf *pf, u16 pfq)
+{
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ rcu_read_lock();
+ ice_for_each_vf_rcu(pf, bkt, vf) {
+ struct ice_vsi *vsi;
+ u16 rxq_idx;
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi)
+ continue;
+
+ ice_for_each_rxq(vsi, rxq_idx)
+ if (vsi->rxq_map[rxq_idx] == pfq) {
+ struct ice_vf *found;
+
+ if (kref_get_unless_zero(&vf->refcnt))
+ found = vf;
+ else
+ found = NULL;
+ rcu_read_unlock();
+ return found;
+ }
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+/**
+ * ice_globalq_to_pfq - convert from global queue index to PF space queue index
+ * @pf: PF used for conversion
+ * @globalq: global queue index used to convert to PF space queue index
+ */
+static u32 ice_globalq_to_pfq(struct ice_pf *pf, u32 globalq)
+{
+ return globalq - pf->hw.func_caps.common_cap.rxq_first_id;
+}
+
+/**
+ * ice_vf_lan_overflow_event - handle LAN overflow event for a VF
+ * @pf: PF that the LAN overflow event happened on
+ * @event: structure holding the event information for the LAN overflow event
+ *
+ * Determine if the LAN overflow event was caused by a VF queue. If it was not
+ * caused by a VF, do nothing. If a VF caused this LAN overflow event trigger a
+ * reset on the offending VF.
+ */
+void
+ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event)
+{
+ u32 gldcb_rtctq, queue;
+ struct ice_vf *vf;
+
+ gldcb_rtctq = le32_to_cpu(event->desc.params.lan_overflow.prtdcb_ruptq);
+ dev_dbg(ice_pf_to_dev(pf), "GLDCB_RTCTQ: 0x%08x\n", gldcb_rtctq);
+
+ /* event returns device global Rx queue number */
+ queue = (gldcb_rtctq & GLDCB_RTCTQ_RXQNUM_M) >>
+ GLDCB_RTCTQ_RXQNUM_S;
+
+ vf = ice_get_vf_from_pfq(pf, ice_globalq_to_pfq(pf, queue));
+ if (!vf)
+ return;
+
+ ice_reset_vf(vf, ICE_VF_RESET_NOTIFY | ICE_VF_RESET_LOCK);
+ ice_put_vf(vf);
+}
+
+/**
+ * ice_set_vf_spoofchk
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @ena: flag to enable or disable feature
+ *
+ * Enable or disable VF spoof checking
+ */
+int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_pf *pf = np->vsi->back;
+ struct ice_vsi *vf_vsi;
+ struct device *dev;
+ struct ice_vf *vf;
+ int ret;
+
+ dev = ice_pf_to_dev(pf);
+
+ vf = ice_get_vf_by_id(pf, vf_id);
+ if (!vf)
+ return -EINVAL;
+
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ goto out_put_vf;
+
+ vf_vsi = ice_get_vf_vsi(vf);
+ if (!vf_vsi) {
+ netdev_err(netdev, "VSI %d for VF %d is null\n",
+ vf->lan_vsi_idx, vf->vf_id);
+ ret = -EINVAL;
+ goto out_put_vf;
+ }
+
+ if (vf_vsi->type != ICE_VSI_VF) {
+ netdev_err(netdev, "Type %d of VSI %d for VF %d is no ICE_VSI_VF\n",
+ vf_vsi->type, vf_vsi->vsi_num, vf->vf_id);
+ ret = -ENODEV;
+ goto out_put_vf;
+ }
+
+ if (ena == vf->spoofchk) {
+ dev_dbg(dev, "VF spoofchk already %s\n", ena ? "ON" : "OFF");
+ ret = 0;
+ goto out_put_vf;
+ }
+
+ ret = ice_vsi_apply_spoofchk(vf_vsi, ena);
+ if (ret)
+ dev_err(dev, "Failed to set spoofchk %s for VF %d VSI %d\n error %d\n",
+ ena ? "ON" : "OFF", vf->vf_id, vf_vsi->vsi_num, ret);
+ else
+ vf->spoofchk = ena;
+
+out_put_vf:
+ ice_put_vf(vf);
+ return ret;
+}
+
+/**
+ * ice_get_vf_cfg
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @ivi: VF configuration structure
+ *
+ * return VF configuration
+ */
+int
+ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_vf *vf;
+ int ret;
+
+ vf = ice_get_vf_by_id(pf, vf_id);
+ if (!vf)
+ return -EINVAL;
+
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ goto out_put_vf;
+
+ ivi->vf = vf_id;
+ ether_addr_copy(ivi->mac, vf->hw_lan_addr.addr);
+
+ /* VF configuration for VLAN and applicable QoS */
+ ivi->vlan = ice_vf_get_port_vlan_id(vf);
+ ivi->qos = ice_vf_get_port_vlan_prio(vf);
+ if (ice_vf_is_port_vlan_ena(vf))
+ ivi->vlan_proto = cpu_to_be16(ice_vf_get_port_vlan_tpid(vf));
+
+ ivi->trusted = vf->trusted;
+ ivi->spoofchk = vf->spoofchk;
+ if (!vf->link_forced)
+ ivi->linkstate = IFLA_VF_LINK_STATE_AUTO;
+ else if (vf->link_up)
+ ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE;
+ else
+ ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE;
+ ivi->max_tx_rate = vf->max_tx_rate;
+ ivi->min_tx_rate = vf->min_tx_rate;
+
+out_put_vf:
+ ice_put_vf(vf);
+ return ret;
+}
+
+/**
+ * ice_set_vf_mac
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @mac: MAC address
+ *
+ * program VF MAC address
+ */
+int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_vf *vf;
+ int ret;
+
+ if (is_multicast_ether_addr(mac)) {
+ netdev_err(netdev, "%pM not a valid unicast address\n", mac);
+ return -EINVAL;
+ }
+
+ vf = ice_get_vf_by_id(pf, vf_id);
+ if (!vf)
+ return -EINVAL;
+
+ /* nothing left to do, unicast MAC already set */
+ if (ether_addr_equal(vf->dev_lan_addr.addr, mac) &&
+ ether_addr_equal(vf->hw_lan_addr.addr, mac)) {
+ ret = 0;
+ goto out_put_vf;
+ }
+
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ goto out_put_vf;
+
+ mutex_lock(&vf->cfg_lock);
+
+ /* VF is notified of its new MAC via the PF's response to the
+ * VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset
+ */
+ ether_addr_copy(vf->dev_lan_addr.addr, mac);
+ ether_addr_copy(vf->hw_lan_addr.addr, mac);
+ if (is_zero_ether_addr(mac)) {
+ /* VF will send VIRTCHNL_OP_ADD_ETH_ADDR message with its MAC */
+ vf->pf_set_mac = false;
+ netdev_info(netdev, "Removing MAC on VF %d. VF driver will be reinitialized\n",
+ vf->vf_id);
+ } else {
+ /* PF will add MAC rule for the VF */
+ vf->pf_set_mac = true;
+ netdev_info(netdev, "Setting MAC %pM on VF %d. VF driver will be reinitialized\n",
+ mac, vf_id);
+ }
+
+ ice_reset_vf(vf, ICE_VF_RESET_NOTIFY);
+ mutex_unlock(&vf->cfg_lock);
+
+out_put_vf:
+ ice_put_vf(vf);
+ return ret;
+}
+
+/**
+ * ice_set_vf_trust
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @trusted: Boolean value to enable/disable trusted VF
+ *
+ * Enable or disable a given VF as trusted
+ */
+int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_vf *vf;
+ int ret;
+
+ vf = ice_get_vf_by_id(pf, vf_id);
+ if (!vf)
+ return -EINVAL;
+
+ if (ice_is_eswitch_mode_switchdev(pf)) {
+ dev_info(ice_pf_to_dev(pf), "Trusted VF is forbidden in switchdev mode\n");
+ return -EOPNOTSUPP;
+ }
+
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ goto out_put_vf;
+
+ /* Check if already trusted */
+ if (trusted == vf->trusted) {
+ ret = 0;
+ goto out_put_vf;
+ }
+
+ mutex_lock(&vf->cfg_lock);
+
+ vf->trusted = trusted;
+ ice_reset_vf(vf, ICE_VF_RESET_NOTIFY);
+ dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n",
+ vf_id, trusted ? "" : "un");
+
+ mutex_unlock(&vf->cfg_lock);
+
+out_put_vf:
+ ice_put_vf(vf);
+ return ret;
+}
+
+/**
+ * ice_set_vf_link_state
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @link_state: required link state
+ *
+ * Set VF's link state, irrespective of physical link state status
+ */
+int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_vf *vf;
+ int ret;
+
+ vf = ice_get_vf_by_id(pf, vf_id);
+ if (!vf)
+ return -EINVAL;
+
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ goto out_put_vf;
+
+ switch (link_state) {
+ case IFLA_VF_LINK_STATE_AUTO:
+ vf->link_forced = false;
+ break;
+ case IFLA_VF_LINK_STATE_ENABLE:
+ vf->link_forced = true;
+ vf->link_up = true;
+ break;
+ case IFLA_VF_LINK_STATE_DISABLE:
+ vf->link_forced = true;
+ vf->link_up = false;
+ break;
+ default:
+ ret = -EINVAL;
+ goto out_put_vf;
+ }
+
+ ice_vc_notify_vf_link_state(vf);
+
+out_put_vf:
+ ice_put_vf(vf);
+ return ret;
+}
+
+/**
+ * ice_calc_all_vfs_min_tx_rate - calculate cumulative min Tx rate on all VFs
+ * @pf: PF associated with VFs
+ */
+static int ice_calc_all_vfs_min_tx_rate(struct ice_pf *pf)
+{
+ struct ice_vf *vf;
+ unsigned int bkt;
+ int rate = 0;
+
+ rcu_read_lock();
+ ice_for_each_vf_rcu(pf, bkt, vf)
+ rate += vf->min_tx_rate;
+ rcu_read_unlock();
+
+ return rate;
+}
+
+/**
+ * ice_min_tx_rate_oversubscribed - check if min Tx rate causes oversubscription
+ * @vf: VF trying to configure min_tx_rate
+ * @min_tx_rate: min Tx rate in Mbps
+ *
+ * Check if the min_tx_rate being passed in will cause oversubscription of total
+ * min_tx_rate based on the current link speed and all other VFs configured
+ * min_tx_rate
+ *
+ * Return true if the passed min_tx_rate would cause oversubscription, else
+ * return false
+ */
+static bool
+ice_min_tx_rate_oversubscribed(struct ice_vf *vf, int min_tx_rate)
+{
+ struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+ int all_vfs_min_tx_rate;
+ int link_speed_mbps;
+
+ if (WARN_ON(!vsi))
+ return false;
+
+ link_speed_mbps = ice_get_link_speed_mbps(vsi);
+ all_vfs_min_tx_rate = ice_calc_all_vfs_min_tx_rate(vf->pf);
+
+ /* this VF's previous rate is being overwritten */
+ all_vfs_min_tx_rate -= vf->min_tx_rate;
+
+ if (all_vfs_min_tx_rate + min_tx_rate > link_speed_mbps) {
+ dev_err(ice_pf_to_dev(vf->pf), "min_tx_rate of %d Mbps on VF %u would cause oversubscription of %d Mbps based on the current link speed %d Mbps\n",
+ min_tx_rate, vf->vf_id,
+ all_vfs_min_tx_rate + min_tx_rate - link_speed_mbps,
+ link_speed_mbps);
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * ice_set_vf_bw - set min/max VF bandwidth
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @min_tx_rate: Minimum Tx rate in Mbps
+ * @max_tx_rate: Maximum Tx rate in Mbps
+ */
+int
+ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
+ int max_tx_rate)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_vsi *vsi;
+ struct device *dev;
+ struct ice_vf *vf;
+ int ret;
+
+ dev = ice_pf_to_dev(pf);
+
+ vf = ice_get_vf_by_id(pf, vf_id);
+ if (!vf)
+ return -EINVAL;
+
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ goto out_put_vf;
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ ret = -EINVAL;
+ goto out_put_vf;
+ }
+
+ if (min_tx_rate && ice_is_dcb_active(pf)) {
+ dev_err(dev, "DCB on PF is currently enabled. VF min Tx rate limiting not allowed on this PF.\n");
+ ret = -EOPNOTSUPP;
+ goto out_put_vf;
+ }
+
+ if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) {
+ ret = -EINVAL;
+ goto out_put_vf;
+ }
+
+ if (vf->min_tx_rate != (unsigned int)min_tx_rate) {
+ ret = ice_set_min_bw_limit(vsi, (u64)min_tx_rate * 1000);
+ if (ret) {
+ dev_err(dev, "Unable to set min-tx-rate for VF %d\n",
+ vf->vf_id);
+ goto out_put_vf;
+ }
+
+ vf->min_tx_rate = min_tx_rate;
+ }
+
+ if (vf->max_tx_rate != (unsigned int)max_tx_rate) {
+ ret = ice_set_max_bw_limit(vsi, (u64)max_tx_rate * 1000);
+ if (ret) {
+ dev_err(dev, "Unable to set max-tx-rate for VF %d\n",
+ vf->vf_id);
+ goto out_put_vf;
+ }
+
+ vf->max_tx_rate = max_tx_rate;
+ }
+
+out_put_vf:
+ ice_put_vf(vf);
+ return ret;
+}
+
+/**
+ * ice_get_vf_stats - populate some stats for the VF
+ * @netdev: the netdev of the PF
+ * @vf_id: the host OS identifier (0-255)
+ * @vf_stats: pointer to the OS memory to be initialized
+ */
+int ice_get_vf_stats(struct net_device *netdev, int vf_id,
+ struct ifla_vf_stats *vf_stats)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_eth_stats *stats;
+ struct ice_vsi *vsi;
+ struct ice_vf *vf;
+ int ret;
+
+ vf = ice_get_vf_by_id(pf, vf_id);
+ if (!vf)
+ return -EINVAL;
+
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ goto out_put_vf;
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ ret = -EINVAL;
+ goto out_put_vf;
+ }
+
+ ice_update_eth_stats(vsi);
+ stats = &vsi->eth_stats;
+
+ memset(vf_stats, 0, sizeof(*vf_stats));
+
+ vf_stats->rx_packets = stats->rx_unicast + stats->rx_broadcast +
+ stats->rx_multicast;
+ vf_stats->tx_packets = stats->tx_unicast + stats->tx_broadcast +
+ stats->tx_multicast;
+ vf_stats->rx_bytes = stats->rx_bytes;
+ vf_stats->tx_bytes = stats->tx_bytes;
+ vf_stats->broadcast = stats->rx_broadcast;
+ vf_stats->multicast = stats->rx_multicast;
+ vf_stats->rx_dropped = stats->rx_discards;
+ vf_stats->tx_dropped = stats->tx_discards;
+
+out_put_vf:
+ ice_put_vf(vf);
+ return ret;
+}
+
+/**
+ * ice_is_supported_port_vlan_proto - make sure the vlan_proto is supported
+ * @hw: hardware structure used to check the VLAN mode
+ * @vlan_proto: VLAN TPID being checked
+ *
+ * If the device is configured in Double VLAN Mode (DVM), then both ETH_P_8021Q
+ * and ETH_P_8021AD are supported. If the device is configured in Single VLAN
+ * Mode (SVM), then only ETH_P_8021Q is supported.
+ */
+static bool
+ice_is_supported_port_vlan_proto(struct ice_hw *hw, u16 vlan_proto)
+{
+ bool is_supported = false;
+
+ switch (vlan_proto) {
+ case ETH_P_8021Q:
+ is_supported = true;
+ break;
+ case ETH_P_8021AD:
+ if (ice_is_dvm_ena(hw))
+ is_supported = true;
+ break;
+ }
+
+ return is_supported;
+}
+
+/**
+ * ice_set_vf_port_vlan
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @vlan_id: VLAN ID being set
+ * @qos: priority setting
+ * @vlan_proto: VLAN protocol
+ *
+ * program VF Port VLAN ID and/or QoS
+ */
+int
+ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
+ __be16 vlan_proto)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ u16 local_vlan_proto = ntohs(vlan_proto);
+ struct device *dev;
+ struct ice_vf *vf;
+ int ret;
+
+ dev = ice_pf_to_dev(pf);
+
+ if (vlan_id >= VLAN_N_VID || qos > 7) {
+ dev_err(dev, "Invalid Port VLAN parameters for VF %d, ID %d, QoS %d\n",
+ vf_id, vlan_id, qos);
+ return -EINVAL;
+ }
+
+ if (!ice_is_supported_port_vlan_proto(&pf->hw, local_vlan_proto)) {
+ dev_err(dev, "VF VLAN protocol 0x%04x is not supported\n",
+ local_vlan_proto);
+ return -EPROTONOSUPPORT;
+ }
+
+ vf = ice_get_vf_by_id(pf, vf_id);
+ if (!vf)
+ return -EINVAL;
+
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ goto out_put_vf;
+
+ if (ice_vf_get_port_vlan_prio(vf) == qos &&
+ ice_vf_get_port_vlan_tpid(vf) == local_vlan_proto &&
+ ice_vf_get_port_vlan_id(vf) == vlan_id) {
+ /* duplicate request, so just return success */
+ dev_dbg(dev, "Duplicate port VLAN %u, QoS %u, TPID 0x%04x request\n",
+ vlan_id, qos, local_vlan_proto);
+ ret = 0;
+ goto out_put_vf;
+ }
+
+ mutex_lock(&vf->cfg_lock);
+
+ vf->port_vlan_info = ICE_VLAN(local_vlan_proto, vlan_id, qos);
+ if (ice_vf_is_port_vlan_ena(vf))
+ dev_info(dev, "Setting VLAN %u, QoS %u, TPID 0x%04x on VF %d\n",
+ vlan_id, qos, local_vlan_proto, vf_id);
+ else
+ dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id);
+
+ ice_reset_vf(vf, ICE_VF_RESET_NOTIFY);
+ mutex_unlock(&vf->cfg_lock);
+
+out_put_vf:
+ ice_put_vf(vf);
+ return ret;
+}
+
+/**
+ * ice_print_vf_rx_mdd_event - print VF Rx malicious driver detect event
+ * @vf: pointer to the VF structure
+ */
+void ice_print_vf_rx_mdd_event(struct ice_vf *vf)
+{
+ struct ice_pf *pf = vf->pf;
+ struct device *dev;
+
+ dev = ice_pf_to_dev(pf);
+
+ dev_info(dev, "%d Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pM. mdd-auto-reset-vfs=%s\n",
+ vf->mdd_rx_events.count, pf->hw.pf_id, vf->vf_id,
+ vf->dev_lan_addr.addr,
+ test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)
+ ? "on" : "off");
+}
+
+/**
+ * ice_print_vfs_mdd_events - print VFs malicious driver detect event
+ * @pf: pointer to the PF structure
+ *
+ * Called from ice_handle_mdd_event to rate limit and print VFs MDD events.
+ */
+void ice_print_vfs_mdd_events(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ /* check that there are pending MDD events to print */
+ if (!test_and_clear_bit(ICE_MDD_VF_PRINT_PENDING, pf->state))
+ return;
+
+ /* VF MDD event logs are rate limited to one second intervals */
+ if (time_is_after_jiffies(pf->vfs.last_printed_mdd_jiffies + HZ * 1))
+ return;
+
+ pf->vfs.last_printed_mdd_jiffies = jiffies;
+
+ mutex_lock(&pf->vfs.table_lock);
+ ice_for_each_vf(pf, bkt, vf) {
+ /* only print Rx MDD event message if there are new events */
+ if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
+ vf->mdd_rx_events.last_printed =
+ vf->mdd_rx_events.count;
+ ice_print_vf_rx_mdd_event(vf);
+ }
+
+ /* only print Tx MDD event message if there are new events */
+ if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) {
+ vf->mdd_tx_events.last_printed =
+ vf->mdd_tx_events.count;
+
+ dev_info(dev, "%d Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pM.\n",
+ vf->mdd_tx_events.count, hw->pf_id, vf->vf_id,
+ vf->dev_lan_addr.addr);
+ }
+ }
+ mutex_unlock(&pf->vfs.table_lock);
+}
+
+/**
+ * ice_restore_all_vfs_msi_state - restore VF MSI state after PF FLR
+ * @pdev: pointer to a pci_dev structure
+ *
+ * Called when recovering from a PF FLR to restore interrupt capability to
+ * the VFs.
+ */
+void ice_restore_all_vfs_msi_state(struct pci_dev *pdev)
+{
+ u16 vf_id;
+ int pos;
+
+ if (!pci_num_vf(pdev))
+ return;
+
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+ if (pos) {
+ struct pci_dev *vfdev;
+
+ pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID,
+ &vf_id);
+ vfdev = pci_get_device(pdev->vendor, vf_id, NULL);
+ while (vfdev) {
+ if (vfdev->is_virtfn && vfdev->physfn == pdev)
+ pci_restore_msi_state(vfdev);
+ vfdev = pci_get_device(pdev->vendor, vf_id,
+ vfdev);
+ }
+ }
+}
+
+/**
+ * ice_is_malicious_vf - helper function to detect a malicious VF
+ * @pf: ptr to struct ice_pf
+ * @event: pointer to the AQ event
+ * @num_msg_proc: the number of messages processed so far
+ * @num_msg_pending: the number of messages peinding in admin queue
+ */
+bool
+ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event,
+ u16 num_msg_proc, u16 num_msg_pending)
+{
+ s16 vf_id = le16_to_cpu(event->desc.retval);
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_mbx_data mbxdata;
+ bool malvf = false;
+ struct ice_vf *vf;
+ int status;
+
+ vf = ice_get_vf_by_id(pf, vf_id);
+ if (!vf)
+ return false;
+
+ if (test_bit(ICE_VF_STATE_DIS, vf->vf_states))
+ goto out_put_vf;
+
+ mbxdata.num_msg_proc = num_msg_proc;
+ mbxdata.num_pending_arq = num_msg_pending;
+ mbxdata.max_num_msgs_mbx = pf->hw.mailboxq.num_rq_entries;
+#define ICE_MBX_OVERFLOW_WATERMARK 64
+ mbxdata.async_watermark_val = ICE_MBX_OVERFLOW_WATERMARK;
+
+ /* check to see if we have a malicious VF */
+ status = ice_mbx_vf_state_handler(&pf->hw, &mbxdata, vf_id, &malvf);
+ if (status)
+ goto out_put_vf;
+
+ if (malvf) {
+ bool report_vf = false;
+
+ /* if the VF is malicious and we haven't let the user
+ * know about it, then let them know now
+ */
+ status = ice_mbx_report_malvf(&pf->hw, pf->vfs.malvfs,
+ ICE_MAX_SRIOV_VFS, vf_id,
+ &report_vf);
+ if (status)
+ dev_dbg(dev, "Error reporting malicious VF\n");
+
+ if (report_vf) {
+ struct ice_vsi *pf_vsi = ice_get_main_vsi(pf);
+
+ if (pf_vsi)
+ dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n",
+ &vf->dev_lan_addr.addr[0],
+ pf_vsi->netdev->dev_addr);
+ }
+ }
+
+out_put_vf:
+ ice_put_vf(vf);
+ return malvf;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h
new file mode 100644
index 000000000..955ab810a
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_SRIOV_H_
+#define _ICE_SRIOV_H_
+#include "ice_virtchnl_fdir.h"
+#include "ice_vf_lib.h"
+#include "ice_virtchnl.h"
+
+/* Static VF transaction/status register def */
+#define VF_DEVICE_STATUS 0xAA
+#define VF_TRANS_PENDING_M 0x20
+
+/* wait defines for polling PF_PCI_CIAD register status */
+#define ICE_PCI_CIAD_WAIT_COUNT 100
+#define ICE_PCI_CIAD_WAIT_DELAY_US 1
+
+/* VF resource constraints */
+#define ICE_MIN_QS_PER_VF 1
+#define ICE_NONQ_VECS_VF 1
+#define ICE_NUM_VF_MSIX_MED 17
+#define ICE_NUM_VF_MSIX_SMALL 5
+#define ICE_NUM_VF_MSIX_MULTIQ_MIN 3
+#define ICE_MIN_INTR_PER_VF (ICE_MIN_QS_PER_VF + 1)
+#define ICE_MAX_VF_RESET_TRIES 40
+#define ICE_MAX_VF_RESET_SLEEP_MS 20
+
+#ifdef CONFIG_PCI_IOV
+void ice_process_vflr_event(struct ice_pf *pf);
+int ice_sriov_configure(struct pci_dev *pdev, int num_vfs);
+int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac);
+int
+ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi);
+
+void ice_free_vfs(struct ice_pf *pf);
+void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event);
+void ice_restore_all_vfs_msi_state(struct pci_dev *pdev);
+bool
+ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event,
+ u16 num_msg_proc, u16 num_msg_pending);
+
+int
+ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
+ __be16 vlan_proto);
+
+int
+ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
+ int max_tx_rate);
+
+int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted);
+
+int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state);
+
+int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena);
+
+int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector);
+
+int
+ice_get_vf_stats(struct net_device *netdev, int vf_id,
+ struct ifla_vf_stats *vf_stats);
+void
+ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event);
+void ice_print_vfs_mdd_events(struct ice_pf *pf);
+void ice_print_vf_rx_mdd_event(struct ice_vf *vf);
+bool
+ice_vc_validate_pattern(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto);
+#else /* CONFIG_PCI_IOV */
+static inline void ice_process_vflr_event(struct ice_pf *pf) { }
+static inline void ice_free_vfs(struct ice_pf *pf) { }
+static inline
+void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) { }
+static inline
+void ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) { }
+static inline void ice_print_vfs_mdd_events(struct ice_pf *pf) { }
+static inline void ice_print_vf_rx_mdd_event(struct ice_vf *vf) { }
+static inline void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) { }
+
+static inline bool
+ice_is_malicious_vf(struct ice_pf __always_unused *pf,
+ struct ice_rq_event_info __always_unused *event,
+ u16 __always_unused num_msg_proc,
+ u16 __always_unused num_msg_pending)
+{
+ return false;
+}
+
+static inline int
+ice_sriov_configure(struct pci_dev __always_unused *pdev,
+ int __always_unused num_vfs)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_mac(struct net_device __always_unused *netdev,
+ int __always_unused vf_id, u8 __always_unused *mac)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+ice_get_vf_cfg(struct net_device __always_unused *netdev,
+ int __always_unused vf_id,
+ struct ifla_vf_info __always_unused *ivi)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_trust(struct net_device __always_unused *netdev,
+ int __always_unused vf_id, bool __always_unused trusted)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_port_vlan(struct net_device __always_unused *netdev,
+ int __always_unused vf_id, u16 __always_unused vid,
+ u8 __always_unused qos, __be16 __always_unused v_proto)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_spoofchk(struct net_device __always_unused *netdev,
+ int __always_unused vf_id, bool __always_unused ena)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_link_state(struct net_device __always_unused *netdev,
+ int __always_unused vf_id, int __always_unused link_state)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_bw(struct net_device __always_unused *netdev,
+ int __always_unused vf_id, int __always_unused min_tx_rate,
+ int __always_unused max_tx_rate)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf,
+ struct ice_q_vector __always_unused *q_vector)
+{
+ return 0;
+}
+
+static inline int
+ice_get_vf_stats(struct net_device __always_unused *netdev,
+ int __always_unused vf_id,
+ struct ifla_vf_stats __always_unused *vf_stats)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_PCI_IOV */
+#endif /* _ICE_SRIOV_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
new file mode 100644
index 000000000..46b36851a
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -0,0 +1,6709 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_lib.h"
+#include "ice_switch.h"
+
+#define ICE_ETH_DA_OFFSET 0
+#define ICE_ETH_ETHTYPE_OFFSET 12
+#define ICE_ETH_VLAN_TCI_OFFSET 14
+#define ICE_MAX_VLAN_ID 0xFFF
+#define ICE_IPV6_ETHER_ID 0x86DD
+
+/* Dummy ethernet header needed in the ice_aqc_sw_rules_elem
+ * struct to configure any switch filter rules.
+ * {DA (6 bytes), SA(6 bytes),
+ * Ether type (2 bytes for header without VLAN tag) OR
+ * VLAN tag (4 bytes for header with VLAN tag) }
+ *
+ * Word on Hardcoded values
+ * byte 0 = 0x2: to identify it as locally administered DA MAC
+ * byte 6 = 0x2: to identify it as locally administered SA MAC
+ * byte 12 = 0x81 & byte 13 = 0x00:
+ * In case of VLAN filter first two bytes defines ether type (0x8100)
+ * and remaining two bytes are placeholder for programming a given VLAN ID
+ * In case of Ether type filter it is treated as header without VLAN tag
+ * and byte 12 and 13 is used to program a given Ether type instead
+ */
+#define DUMMY_ETH_HDR_LEN 16
+static const u8 dummy_eth_header[DUMMY_ETH_HDR_LEN] = { 0x2, 0, 0, 0, 0, 0,
+ 0x2, 0, 0, 0, 0, 0,
+ 0x81, 0, 0, 0};
+
+enum {
+ ICE_PKT_OUTER_IPV6 = BIT(0),
+ ICE_PKT_TUN_GTPC = BIT(1),
+ ICE_PKT_TUN_GTPU = BIT(2),
+ ICE_PKT_TUN_NVGRE = BIT(3),
+ ICE_PKT_TUN_UDP = BIT(4),
+ ICE_PKT_INNER_IPV6 = BIT(5),
+ ICE_PKT_INNER_TCP = BIT(6),
+ ICE_PKT_INNER_UDP = BIT(7),
+ ICE_PKT_GTP_NOPAY = BIT(8),
+ ICE_PKT_KMALLOC = BIT(9),
+ ICE_PKT_PPPOE = BIT(10),
+ ICE_PKT_L2TPV3 = BIT(11),
+};
+
+struct ice_dummy_pkt_offsets {
+ enum ice_protocol_type type;
+ u16 offset; /* ICE_PROTOCOL_LAST indicates end of list */
+};
+
+struct ice_dummy_pkt_profile {
+ const struct ice_dummy_pkt_offsets *offsets;
+ const u8 *pkt;
+ u32 match;
+ u16 pkt_len;
+ u16 offsets_len;
+};
+
+#define ICE_DECLARE_PKT_OFFSETS(type) \
+ static const struct ice_dummy_pkt_offsets \
+ ice_dummy_##type##_packet_offsets[]
+
+#define ICE_DECLARE_PKT_TEMPLATE(type) \
+ static const u8 ice_dummy_##type##_packet[]
+
+#define ICE_PKT_PROFILE(type, m) { \
+ .match = (m), \
+ .pkt = ice_dummy_##type##_packet, \
+ .pkt_len = sizeof(ice_dummy_##type##_packet), \
+ .offsets = ice_dummy_##type##_packet_offsets, \
+ .offsets_len = sizeof(ice_dummy_##type##_packet_offsets), \
+}
+
+ICE_DECLARE_PKT_OFFSETS(vlan) = {
+ { ICE_VLAN_OFOS, 12 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(vlan) = {
+ 0x81, 0x00, 0x00, 0x00, /* ICE_VLAN_OFOS 12 */
+};
+
+ICE_DECLARE_PKT_OFFSETS(qinq) = {
+ { ICE_VLAN_EX, 12 },
+ { ICE_VLAN_IN, 16 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(qinq) = {
+ 0x91, 0x00, 0x00, 0x00, /* ICE_VLAN_EX 12 */
+ 0x81, 0x00, 0x00, 0x00, /* ICE_VLAN_IN 16 */
+};
+
+ICE_DECLARE_PKT_OFFSETS(gre_tcp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_IPV4_OFOS, 14 },
+ { ICE_NVGRE, 34 },
+ { ICE_MAC_IL, 42 },
+ { ICE_ETYPE_IL, 54 },
+ { ICE_IPV4_IL, 56 },
+ { ICE_TCP_IL, 76 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(gre_tcp) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x08, 0x00, /* ICE_ETYPE_OL 12 */
+
+ 0x45, 0x00, 0x00, 0x3E, /* ICE_IPV4_OFOS 14 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x2F, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x80, 0x00, 0x65, 0x58, /* ICE_NVGRE 34 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 42 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x08, 0x00, /* ICE_ETYPE_IL 54 */
+
+ 0x45, 0x00, 0x00, 0x14, /* ICE_IPV4_IL 56 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x06, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 76 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x50, 0x02, 0x20, 0x00,
+ 0x00, 0x00, 0x00, 0x00
+};
+
+ICE_DECLARE_PKT_OFFSETS(gre_udp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_IPV4_OFOS, 14 },
+ { ICE_NVGRE, 34 },
+ { ICE_MAC_IL, 42 },
+ { ICE_ETYPE_IL, 54 },
+ { ICE_IPV4_IL, 56 },
+ { ICE_UDP_ILOS, 76 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(gre_udp) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x08, 0x00, /* ICE_ETYPE_OL 12 */
+
+ 0x45, 0x00, 0x00, 0x3E, /* ICE_IPV4_OFOS 14 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x2F, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x80, 0x00, 0x65, 0x58, /* ICE_NVGRE 34 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 42 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x08, 0x00, /* ICE_ETYPE_IL 54 */
+
+ 0x45, 0x00, 0x00, 0x14, /* ICE_IPV4_IL 56 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x11, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 76 */
+ 0x00, 0x08, 0x00, 0x00,
+};
+
+ICE_DECLARE_PKT_OFFSETS(udp_tun_tcp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_IPV4_OFOS, 14 },
+ { ICE_UDP_OF, 34 },
+ { ICE_VXLAN, 42 },
+ { ICE_GENEVE, 42 },
+ { ICE_VXLAN_GPE, 42 },
+ { ICE_MAC_IL, 50 },
+ { ICE_ETYPE_IL, 62 },
+ { ICE_IPV4_IL, 64 },
+ { ICE_TCP_IL, 84 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(udp_tun_tcp) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x08, 0x00, /* ICE_ETYPE_OL 12 */
+
+ 0x45, 0x00, 0x00, 0x5a, /* ICE_IPV4_OFOS 14 */
+ 0x00, 0x01, 0x00, 0x00,
+ 0x40, 0x11, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x12, 0xb5, /* ICE_UDP_OF 34 */
+ 0x00, 0x46, 0x00, 0x00,
+
+ 0x00, 0x00, 0x65, 0x58, /* ICE_VXLAN 42 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 50 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x08, 0x00, /* ICE_ETYPE_IL 62 */
+
+ 0x45, 0x00, 0x00, 0x28, /* ICE_IPV4_IL 64 */
+ 0x00, 0x01, 0x00, 0x00,
+ 0x40, 0x06, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 84 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x50, 0x02, 0x20, 0x00,
+ 0x00, 0x00, 0x00, 0x00
+};
+
+ICE_DECLARE_PKT_OFFSETS(udp_tun_udp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_IPV4_OFOS, 14 },
+ { ICE_UDP_OF, 34 },
+ { ICE_VXLAN, 42 },
+ { ICE_GENEVE, 42 },
+ { ICE_VXLAN_GPE, 42 },
+ { ICE_MAC_IL, 50 },
+ { ICE_ETYPE_IL, 62 },
+ { ICE_IPV4_IL, 64 },
+ { ICE_UDP_ILOS, 84 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(udp_tun_udp) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x08, 0x00, /* ICE_ETYPE_OL 12 */
+
+ 0x45, 0x00, 0x00, 0x4e, /* ICE_IPV4_OFOS 14 */
+ 0x00, 0x01, 0x00, 0x00,
+ 0x00, 0x11, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x12, 0xb5, /* ICE_UDP_OF 34 */
+ 0x00, 0x3a, 0x00, 0x00,
+
+ 0x00, 0x00, 0x65, 0x58, /* ICE_VXLAN 42 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 50 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x08, 0x00, /* ICE_ETYPE_IL 62 */
+
+ 0x45, 0x00, 0x00, 0x1c, /* ICE_IPV4_IL 64 */
+ 0x00, 0x01, 0x00, 0x00,
+ 0x00, 0x11, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 84 */
+ 0x00, 0x08, 0x00, 0x00,
+};
+
+ICE_DECLARE_PKT_OFFSETS(gre_ipv6_tcp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_IPV4_OFOS, 14 },
+ { ICE_NVGRE, 34 },
+ { ICE_MAC_IL, 42 },
+ { ICE_ETYPE_IL, 54 },
+ { ICE_IPV6_IL, 56 },
+ { ICE_TCP_IL, 96 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(gre_ipv6_tcp) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x08, 0x00, /* ICE_ETYPE_OL 12 */
+
+ 0x45, 0x00, 0x00, 0x66, /* ICE_IPV4_OFOS 14 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x2F, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x80, 0x00, 0x65, 0x58, /* ICE_NVGRE 34 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 42 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x86, 0xdd, /* ICE_ETYPE_IL 54 */
+
+ 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_IL 56 */
+ 0x00, 0x08, 0x06, 0x40,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 96 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x50, 0x02, 0x20, 0x00,
+ 0x00, 0x00, 0x00, 0x00
+};
+
+ICE_DECLARE_PKT_OFFSETS(gre_ipv6_udp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_IPV4_OFOS, 14 },
+ { ICE_NVGRE, 34 },
+ { ICE_MAC_IL, 42 },
+ { ICE_ETYPE_IL, 54 },
+ { ICE_IPV6_IL, 56 },
+ { ICE_UDP_ILOS, 96 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(gre_ipv6_udp) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x08, 0x00, /* ICE_ETYPE_OL 12 */
+
+ 0x45, 0x00, 0x00, 0x5a, /* ICE_IPV4_OFOS 14 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x2F, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x80, 0x00, 0x65, 0x58, /* ICE_NVGRE 34 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 42 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x86, 0xdd, /* ICE_ETYPE_IL 54 */
+
+ 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_IL 56 */
+ 0x00, 0x08, 0x11, 0x40,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 96 */
+ 0x00, 0x08, 0x00, 0x00,
+};
+
+ICE_DECLARE_PKT_OFFSETS(udp_tun_ipv6_tcp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_IPV4_OFOS, 14 },
+ { ICE_UDP_OF, 34 },
+ { ICE_VXLAN, 42 },
+ { ICE_GENEVE, 42 },
+ { ICE_VXLAN_GPE, 42 },
+ { ICE_MAC_IL, 50 },
+ { ICE_ETYPE_IL, 62 },
+ { ICE_IPV6_IL, 64 },
+ { ICE_TCP_IL, 104 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(udp_tun_ipv6_tcp) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x08, 0x00, /* ICE_ETYPE_OL 12 */
+
+ 0x45, 0x00, 0x00, 0x6e, /* ICE_IPV4_OFOS 14 */
+ 0x00, 0x01, 0x00, 0x00,
+ 0x40, 0x11, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x12, 0xb5, /* ICE_UDP_OF 34 */
+ 0x00, 0x5a, 0x00, 0x00,
+
+ 0x00, 0x00, 0x65, 0x58, /* ICE_VXLAN 42 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 50 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x86, 0xdd, /* ICE_ETYPE_IL 62 */
+
+ 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_IL 64 */
+ 0x00, 0x08, 0x06, 0x40,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 104 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x50, 0x02, 0x20, 0x00,
+ 0x00, 0x00, 0x00, 0x00
+};
+
+ICE_DECLARE_PKT_OFFSETS(udp_tun_ipv6_udp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_IPV4_OFOS, 14 },
+ { ICE_UDP_OF, 34 },
+ { ICE_VXLAN, 42 },
+ { ICE_GENEVE, 42 },
+ { ICE_VXLAN_GPE, 42 },
+ { ICE_MAC_IL, 50 },
+ { ICE_ETYPE_IL, 62 },
+ { ICE_IPV6_IL, 64 },
+ { ICE_UDP_ILOS, 104 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(udp_tun_ipv6_udp) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x08, 0x00, /* ICE_ETYPE_OL 12 */
+
+ 0x45, 0x00, 0x00, 0x62, /* ICE_IPV4_OFOS 14 */
+ 0x00, 0x01, 0x00, 0x00,
+ 0x00, 0x11, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x12, 0xb5, /* ICE_UDP_OF 34 */
+ 0x00, 0x4e, 0x00, 0x00,
+
+ 0x00, 0x00, 0x65, 0x58, /* ICE_VXLAN 42 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 50 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x86, 0xdd, /* ICE_ETYPE_IL 62 */
+
+ 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_IL 64 */
+ 0x00, 0x08, 0x11, 0x40,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 104 */
+ 0x00, 0x08, 0x00, 0x00,
+};
+
+/* offset info for MAC + IPv4 + UDP dummy packet */
+ICE_DECLARE_PKT_OFFSETS(udp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_IPV4_OFOS, 14 },
+ { ICE_UDP_ILOS, 34 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+/* Dummy packet for MAC + IPv4 + UDP */
+ICE_DECLARE_PKT_TEMPLATE(udp) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x08, 0x00, /* ICE_ETYPE_OL 12 */
+
+ 0x45, 0x00, 0x00, 0x1c, /* ICE_IPV4_OFOS 14 */
+ 0x00, 0x01, 0x00, 0x00,
+ 0x00, 0x11, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 34 */
+ 0x00, 0x08, 0x00, 0x00,
+
+ 0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+/* offset info for MAC + IPv4 + TCP dummy packet */
+ICE_DECLARE_PKT_OFFSETS(tcp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_IPV4_OFOS, 14 },
+ { ICE_TCP_IL, 34 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+/* Dummy packet for MAC + IPv4 + TCP */
+ICE_DECLARE_PKT_TEMPLATE(tcp) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x08, 0x00, /* ICE_ETYPE_OL 12 */
+
+ 0x45, 0x00, 0x00, 0x28, /* ICE_IPV4_OFOS 14 */
+ 0x00, 0x01, 0x00, 0x00,
+ 0x00, 0x06, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 34 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x50, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(tcp_ipv6) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_IPV6_OFOS, 14 },
+ { ICE_TCP_IL, 54 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(tcp_ipv6) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x86, 0xDD, /* ICE_ETYPE_OL 12 */
+
+ 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 40 */
+ 0x00, 0x14, 0x06, 0x00, /* Next header is TCP */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 54 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x50, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+/* IPv6 + UDP */
+ICE_DECLARE_PKT_OFFSETS(udp_ipv6) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_IPV6_OFOS, 14 },
+ { ICE_UDP_ILOS, 54 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+/* IPv6 + UDP dummy packet */
+ICE_DECLARE_PKT_TEMPLATE(udp_ipv6) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x86, 0xDD, /* ICE_ETYPE_OL 12 */
+
+ 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 40 */
+ 0x00, 0x10, 0x11, 0x00, /* Next header UDP */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 54 */
+ 0x00, 0x10, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* needed for ESP packets */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+/* Outer IPv4 + Outer UDP + GTP + Inner IPv4 + Inner TCP */
+ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv4_tcp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_IPV4_OFOS, 14 },
+ { ICE_UDP_OF, 34 },
+ { ICE_GTP, 42 },
+ { ICE_IPV4_IL, 62 },
+ { ICE_TCP_IL, 82 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv4_tcp) = {
+ 0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x08, 0x00,
+
+ 0x45, 0x00, 0x00, 0x58, /* IP 14 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x11, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x08, 0x68, /* UDP 34 */
+ 0x00, 0x44, 0x00, 0x00,
+
+ 0x34, 0xff, 0x00, 0x34, /* ICE_GTP Header 42 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x85,
+
+ 0x02, 0x00, 0x00, 0x00, /* GTP_PDUSession_ExtensionHeader 54 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x45, 0x00, 0x00, 0x28, /* IP 62 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x06, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* TCP 82 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x50, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+/* Outer IPv4 + Outer UDP + GTP + Inner IPv4 + Inner UDP */
+ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv4_udp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_IPV4_OFOS, 14 },
+ { ICE_UDP_OF, 34 },
+ { ICE_GTP, 42 },
+ { ICE_IPV4_IL, 62 },
+ { ICE_UDP_ILOS, 82 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv4_udp) = {
+ 0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x08, 0x00,
+
+ 0x45, 0x00, 0x00, 0x4c, /* IP 14 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x11, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x08, 0x68, /* UDP 34 */
+ 0x00, 0x38, 0x00, 0x00,
+
+ 0x34, 0xff, 0x00, 0x28, /* ICE_GTP Header 42 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x85,
+
+ 0x02, 0x00, 0x00, 0x00, /* GTP_PDUSession_ExtensionHeader 54 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x45, 0x00, 0x00, 0x1c, /* IP 62 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x11, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* UDP 82 */
+ 0x00, 0x08, 0x00, 0x00,
+
+ 0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+/* Outer IPv6 + Outer UDP + GTP + Inner IPv4 + Inner TCP */
+ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv6_tcp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_IPV4_OFOS, 14 },
+ { ICE_UDP_OF, 34 },
+ { ICE_GTP, 42 },
+ { ICE_IPV6_IL, 62 },
+ { ICE_TCP_IL, 102 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv6_tcp) = {
+ 0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x08, 0x00,
+
+ 0x45, 0x00, 0x00, 0x6c, /* IP 14 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x11, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x08, 0x68, /* UDP 34 */
+ 0x00, 0x58, 0x00, 0x00,
+
+ 0x34, 0xff, 0x00, 0x48, /* ICE_GTP Header 42 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x85,
+
+ 0x02, 0x00, 0x00, 0x00, /* GTP_PDUSession_ExtensionHeader 54 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x60, 0x00, 0x00, 0x00, /* IPv6 62 */
+ 0x00, 0x14, 0x06, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* TCP 102 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x50, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv6_udp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_IPV4_OFOS, 14 },
+ { ICE_UDP_OF, 34 },
+ { ICE_GTP, 42 },
+ { ICE_IPV6_IL, 62 },
+ { ICE_UDP_ILOS, 102 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv6_udp) = {
+ 0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x08, 0x00,
+
+ 0x45, 0x00, 0x00, 0x60, /* IP 14 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x11, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x08, 0x68, /* UDP 34 */
+ 0x00, 0x4c, 0x00, 0x00,
+
+ 0x34, 0xff, 0x00, 0x3c, /* ICE_GTP Header 42 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x85,
+
+ 0x02, 0x00, 0x00, 0x00, /* GTP_PDUSession_ExtensionHeader 54 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x60, 0x00, 0x00, 0x00, /* IPv6 62 */
+ 0x00, 0x08, 0x11, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* UDP 102 */
+ 0x00, 0x08, 0x00, 0x00,
+
+ 0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv6_gtpu_ipv4_tcp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_IPV6_OFOS, 14 },
+ { ICE_UDP_OF, 54 },
+ { ICE_GTP, 62 },
+ { ICE_IPV4_IL, 82 },
+ { ICE_TCP_IL, 102 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv6_gtpu_ipv4_tcp) = {
+ 0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x86, 0xdd,
+
+ 0x60, 0x00, 0x00, 0x00, /* IPv6 14 */
+ 0x00, 0x44, 0x11, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x08, 0x68, /* UDP 54 */
+ 0x00, 0x44, 0x00, 0x00,
+
+ 0x34, 0xff, 0x00, 0x34, /* ICE_GTP Header 62 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x85,
+
+ 0x02, 0x00, 0x00, 0x00, /* GTP_PDUSession_ExtensionHeader 74 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x45, 0x00, 0x00, 0x28, /* IP 82 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x06, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* TCP 102 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x50, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv6_gtpu_ipv4_udp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_IPV6_OFOS, 14 },
+ { ICE_UDP_OF, 54 },
+ { ICE_GTP, 62 },
+ { ICE_IPV4_IL, 82 },
+ { ICE_UDP_ILOS, 102 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv6_gtpu_ipv4_udp) = {
+ 0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x86, 0xdd,
+
+ 0x60, 0x00, 0x00, 0x00, /* IPv6 14 */
+ 0x00, 0x38, 0x11, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x08, 0x68, /* UDP 54 */
+ 0x00, 0x38, 0x00, 0x00,
+
+ 0x34, 0xff, 0x00, 0x28, /* ICE_GTP Header 62 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x85,
+
+ 0x02, 0x00, 0x00, 0x00, /* GTP_PDUSession_ExtensionHeader 74 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x45, 0x00, 0x00, 0x1c, /* IP 82 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x11, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* UDP 102 */
+ 0x00, 0x08, 0x00, 0x00,
+
+ 0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv6_gtpu_ipv6_tcp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_IPV6_OFOS, 14 },
+ { ICE_UDP_OF, 54 },
+ { ICE_GTP, 62 },
+ { ICE_IPV6_IL, 82 },
+ { ICE_TCP_IL, 122 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv6_gtpu_ipv6_tcp) = {
+ 0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x86, 0xdd,
+
+ 0x60, 0x00, 0x00, 0x00, /* IPv6 14 */
+ 0x00, 0x58, 0x11, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x08, 0x68, /* UDP 54 */
+ 0x00, 0x58, 0x00, 0x00,
+
+ 0x34, 0xff, 0x00, 0x48, /* ICE_GTP Header 62 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x85,
+
+ 0x02, 0x00, 0x00, 0x00, /* GTP_PDUSession_ExtensionHeader 74 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x60, 0x00, 0x00, 0x00, /* IPv6 82 */
+ 0x00, 0x14, 0x06, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* TCP 122 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x50, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv6_gtpu_ipv6_udp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_IPV6_OFOS, 14 },
+ { ICE_UDP_OF, 54 },
+ { ICE_GTP, 62 },
+ { ICE_IPV6_IL, 82 },
+ { ICE_UDP_ILOS, 122 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv6_gtpu_ipv6_udp) = {
+ 0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x86, 0xdd,
+
+ 0x60, 0x00, 0x00, 0x00, /* IPv6 14 */
+ 0x00, 0x4c, 0x11, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x08, 0x68, /* UDP 54 */
+ 0x00, 0x4c, 0x00, 0x00,
+
+ 0x34, 0xff, 0x00, 0x3c, /* ICE_GTP Header 62 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x85,
+
+ 0x02, 0x00, 0x00, 0x00, /* GTP_PDUSession_ExtensionHeader 74 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x60, 0x00, 0x00, 0x00, /* IPv6 82 */
+ 0x00, 0x08, 0x11, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* UDP 122 */
+ 0x00, 0x08, 0x00, 0x00,
+
+ 0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv4) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_IPV4_OFOS, 14 },
+ { ICE_UDP_OF, 34 },
+ { ICE_GTP_NO_PAY, 42 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv4) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x08, 0x00,
+
+ 0x45, 0x00, 0x00, 0x44, /* ICE_IPV4_OFOS 14 */
+ 0x00, 0x00, 0x40, 0x00,
+ 0x40, 0x11, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x08, 0x68, 0x08, 0x68, /* ICE_UDP_OF 34 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x34, 0xff, 0x00, 0x28, /* ICE_GTP 42 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x85,
+
+ 0x02, 0x00, 0x00, 0x00, /* PDU Session extension header */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x45, 0x00, 0x00, 0x14, /* ICE_IPV4_IL 62 */
+ 0x00, 0x00, 0x40, 0x00,
+ 0x40, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00,
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv6_gtp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_IPV6_OFOS, 14 },
+ { ICE_UDP_OF, 54 },
+ { ICE_GTP_NO_PAY, 62 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv6_gtp) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x86, 0xdd,
+
+ 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 14 */
+ 0x00, 0x6c, 0x11, 0x00, /* Next header UDP*/
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x08, 0x68, 0x08, 0x68, /* ICE_UDP_OF 54 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x30, 0x00, 0x00, 0x28, /* ICE_GTP 62 */
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00,
+};
+
+ICE_DECLARE_PKT_OFFSETS(pppoe_ipv4_tcp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_PPPOE, 14 },
+ { ICE_IPV4_OFOS, 22 },
+ { ICE_TCP_IL, 42 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(pppoe_ipv4_tcp) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x88, 0x64, /* ICE_ETYPE_OL 12 */
+
+ 0x11, 0x00, 0x00, 0x00, /* ICE_PPPOE 14 */
+ 0x00, 0x16,
+
+ 0x00, 0x21, /* PPP Link Layer 20 */
+
+ 0x45, 0x00, 0x00, 0x28, /* ICE_IPV4_OFOS 22 */
+ 0x00, 0x01, 0x00, 0x00,
+ 0x00, 0x06, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 42 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x50, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, /* 2 bytes for 4 bytes alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(pppoe_ipv4_udp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_PPPOE, 14 },
+ { ICE_IPV4_OFOS, 22 },
+ { ICE_UDP_ILOS, 42 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(pppoe_ipv4_udp) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x88, 0x64, /* ICE_ETYPE_OL 12 */
+
+ 0x11, 0x00, 0x00, 0x00, /* ICE_PPPOE 14 */
+ 0x00, 0x16,
+
+ 0x00, 0x21, /* PPP Link Layer 20 */
+
+ 0x45, 0x00, 0x00, 0x1c, /* ICE_IPV4_OFOS 22 */
+ 0x00, 0x01, 0x00, 0x00,
+ 0x00, 0x11, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 42 */
+ 0x00, 0x08, 0x00, 0x00,
+
+ 0x00, 0x00, /* 2 bytes for 4 bytes alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(pppoe_ipv6_tcp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_PPPOE, 14 },
+ { ICE_IPV6_OFOS, 22 },
+ { ICE_TCP_IL, 62 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(pppoe_ipv6_tcp) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x88, 0x64, /* ICE_ETYPE_OL 12 */
+
+ 0x11, 0x00, 0x00, 0x00, /* ICE_PPPOE 14 */
+ 0x00, 0x2a,
+
+ 0x00, 0x57, /* PPP Link Layer 20 */
+
+ 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 22 */
+ 0x00, 0x14, 0x06, 0x00, /* Next header is TCP */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 62 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x50, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, /* 2 bytes for 4 bytes alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(pppoe_ipv6_udp) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_PPPOE, 14 },
+ { ICE_IPV6_OFOS, 22 },
+ { ICE_UDP_ILOS, 62 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(pppoe_ipv6_udp) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x88, 0x64, /* ICE_ETYPE_OL 12 */
+
+ 0x11, 0x00, 0x00, 0x00, /* ICE_PPPOE 14 */
+ 0x00, 0x2a,
+
+ 0x00, 0x57, /* PPP Link Layer 20 */
+
+ 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 22 */
+ 0x00, 0x08, 0x11, 0x00, /* Next header UDP*/
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 62 */
+ 0x00, 0x08, 0x00, 0x00,
+
+ 0x00, 0x00, /* 2 bytes for 4 bytes alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv4_l2tpv3) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_IPV4_OFOS, 14 },
+ { ICE_L2TPV3, 34 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv4_l2tpv3) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x08, 0x00, /* ICE_ETYPE_OL 12 */
+
+ 0x45, 0x00, 0x00, 0x20, /* ICE_IPV4_IL 14 */
+ 0x00, 0x00, 0x40, 0x00,
+ 0x40, 0x73, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_L2TPV3 34 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, /* 2 bytes for 4 bytes alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv6_l2tpv3) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_ETYPE_OL, 12 },
+ { ICE_IPV6_OFOS, 14 },
+ { ICE_L2TPV3, 54 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv6_l2tpv3) = {
+ 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x86, 0xDD, /* ICE_ETYPE_OL 12 */
+
+ 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_IL 14 */
+ 0x00, 0x0c, 0x73, 0x40,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, /* ICE_L2TPV3 54 */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, /* 2 bytes for 4 bytes alignment */
+};
+
+static const struct ice_dummy_pkt_profile ice_dummy_pkt_profiles[] = {
+ ICE_PKT_PROFILE(ipv6_gtp, ICE_PKT_TUN_GTPU | ICE_PKT_OUTER_IPV6 |
+ ICE_PKT_GTP_NOPAY),
+ ICE_PKT_PROFILE(ipv6_gtpu_ipv6_udp, ICE_PKT_TUN_GTPU |
+ ICE_PKT_OUTER_IPV6 |
+ ICE_PKT_INNER_IPV6 |
+ ICE_PKT_INNER_UDP),
+ ICE_PKT_PROFILE(ipv6_gtpu_ipv6_tcp, ICE_PKT_TUN_GTPU |
+ ICE_PKT_OUTER_IPV6 |
+ ICE_PKT_INNER_IPV6),
+ ICE_PKT_PROFILE(ipv6_gtpu_ipv4_udp, ICE_PKT_TUN_GTPU |
+ ICE_PKT_OUTER_IPV6 |
+ ICE_PKT_INNER_UDP),
+ ICE_PKT_PROFILE(ipv6_gtpu_ipv4_tcp, ICE_PKT_TUN_GTPU |
+ ICE_PKT_OUTER_IPV6),
+ ICE_PKT_PROFILE(ipv4_gtpu_ipv4, ICE_PKT_TUN_GTPU | ICE_PKT_GTP_NOPAY),
+ ICE_PKT_PROFILE(ipv4_gtpu_ipv6_udp, ICE_PKT_TUN_GTPU |
+ ICE_PKT_INNER_IPV6 |
+ ICE_PKT_INNER_UDP),
+ ICE_PKT_PROFILE(ipv4_gtpu_ipv6_tcp, ICE_PKT_TUN_GTPU |
+ ICE_PKT_INNER_IPV6),
+ ICE_PKT_PROFILE(ipv4_gtpu_ipv4_udp, ICE_PKT_TUN_GTPU |
+ ICE_PKT_INNER_UDP),
+ ICE_PKT_PROFILE(ipv4_gtpu_ipv4_tcp, ICE_PKT_TUN_GTPU),
+ ICE_PKT_PROFILE(ipv6_gtp, ICE_PKT_TUN_GTPC | ICE_PKT_OUTER_IPV6),
+ ICE_PKT_PROFILE(ipv4_gtpu_ipv4, ICE_PKT_TUN_GTPC),
+ ICE_PKT_PROFILE(pppoe_ipv6_udp, ICE_PKT_PPPOE | ICE_PKT_OUTER_IPV6 |
+ ICE_PKT_INNER_UDP),
+ ICE_PKT_PROFILE(pppoe_ipv6_tcp, ICE_PKT_PPPOE | ICE_PKT_OUTER_IPV6),
+ ICE_PKT_PROFILE(pppoe_ipv4_udp, ICE_PKT_PPPOE | ICE_PKT_INNER_UDP),
+ ICE_PKT_PROFILE(pppoe_ipv4_tcp, ICE_PKT_PPPOE),
+ ICE_PKT_PROFILE(gre_ipv6_tcp, ICE_PKT_TUN_NVGRE | ICE_PKT_INNER_IPV6 |
+ ICE_PKT_INNER_TCP),
+ ICE_PKT_PROFILE(gre_tcp, ICE_PKT_TUN_NVGRE | ICE_PKT_INNER_TCP),
+ ICE_PKT_PROFILE(gre_ipv6_udp, ICE_PKT_TUN_NVGRE | ICE_PKT_INNER_IPV6),
+ ICE_PKT_PROFILE(gre_udp, ICE_PKT_TUN_NVGRE),
+ ICE_PKT_PROFILE(udp_tun_ipv6_tcp, ICE_PKT_TUN_UDP |
+ ICE_PKT_INNER_IPV6 |
+ ICE_PKT_INNER_TCP),
+ ICE_PKT_PROFILE(ipv6_l2tpv3, ICE_PKT_L2TPV3 | ICE_PKT_OUTER_IPV6),
+ ICE_PKT_PROFILE(ipv4_l2tpv3, ICE_PKT_L2TPV3),
+ ICE_PKT_PROFILE(udp_tun_tcp, ICE_PKT_TUN_UDP | ICE_PKT_INNER_TCP),
+ ICE_PKT_PROFILE(udp_tun_ipv6_udp, ICE_PKT_TUN_UDP |
+ ICE_PKT_INNER_IPV6),
+ ICE_PKT_PROFILE(udp_tun_udp, ICE_PKT_TUN_UDP),
+ ICE_PKT_PROFILE(udp_ipv6, ICE_PKT_OUTER_IPV6 | ICE_PKT_INNER_UDP),
+ ICE_PKT_PROFILE(udp, ICE_PKT_INNER_UDP),
+ ICE_PKT_PROFILE(tcp_ipv6, ICE_PKT_OUTER_IPV6),
+ ICE_PKT_PROFILE(tcp, 0),
+};
+
+#define ICE_SW_RULE_RX_TX_HDR_SIZE(s, l) struct_size((s), hdr_data, (l))
+#define ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s) \
+ ICE_SW_RULE_RX_TX_HDR_SIZE((s), DUMMY_ETH_HDR_LEN)
+#define ICE_SW_RULE_RX_TX_NO_HDR_SIZE(s) \
+ ICE_SW_RULE_RX_TX_HDR_SIZE((s), 0)
+#define ICE_SW_RULE_LG_ACT_SIZE(s, n) struct_size((s), act, (n))
+#define ICE_SW_RULE_VSI_LIST_SIZE(s, n) struct_size((s), vsi, (n))
+
+/* this is a recipe to profile association bitmap */
+static DECLARE_BITMAP(recipe_to_profile[ICE_MAX_NUM_RECIPES],
+ ICE_MAX_NUM_PROFILES);
+
+/* this is a profile to recipe association bitmap */
+static DECLARE_BITMAP(profile_to_recipe[ICE_MAX_NUM_PROFILES],
+ ICE_MAX_NUM_RECIPES);
+
+/**
+ * ice_init_def_sw_recp - initialize the recipe book keeping tables
+ * @hw: pointer to the HW struct
+ *
+ * Allocate memory for the entire recipe table and initialize the structures/
+ * entries corresponding to basic recipes.
+ */
+int ice_init_def_sw_recp(struct ice_hw *hw)
+{
+ struct ice_sw_recipe *recps;
+ u8 i;
+
+ recps = devm_kcalloc(ice_hw_to_dev(hw), ICE_MAX_NUM_RECIPES,
+ sizeof(*recps), GFP_KERNEL);
+ if (!recps)
+ return -ENOMEM;
+
+ for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) {
+ recps[i].root_rid = i;
+ INIT_LIST_HEAD(&recps[i].filt_rules);
+ INIT_LIST_HEAD(&recps[i].filt_replay_rules);
+ INIT_LIST_HEAD(&recps[i].rg_list);
+ mutex_init(&recps[i].filt_rule_lock);
+ }
+
+ hw->switch_info->recp_list = recps;
+
+ return 0;
+}
+
+/**
+ * ice_aq_get_sw_cfg - get switch configuration
+ * @hw: pointer to the hardware structure
+ * @buf: pointer to the result buffer
+ * @buf_size: length of the buffer available for response
+ * @req_desc: pointer to requested descriptor
+ * @num_elems: pointer to number of elements
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get switch configuration (0x0200) to be placed in buf.
+ * This admin command returns information such as initial VSI/port number
+ * and switch ID it belongs to.
+ *
+ * NOTE: *req_desc is both an input/output parameter.
+ * The caller of this function first calls this function with *request_desc set
+ * to 0. If the response from f/w has *req_desc set to 0, all the switch
+ * configuration information has been returned; if non-zero (meaning not all
+ * the information was returned), the caller should call this function again
+ * with *req_desc set to the previous value returned by f/w to get the
+ * next block of switch configuration information.
+ *
+ * *num_elems is output only parameter. This reflects the number of elements
+ * in response buffer. The caller of this function to use *num_elems while
+ * parsing the response buffer.
+ */
+static int
+ice_aq_get_sw_cfg(struct ice_hw *hw, struct ice_aqc_get_sw_cfg_resp_elem *buf,
+ u16 buf_size, u16 *req_desc, u16 *num_elems,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_get_sw_cfg *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_sw_cfg);
+ cmd = &desc.params.get_sw_conf;
+ cmd->element = cpu_to_le16(*req_desc);
+
+ status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+ if (!status) {
+ *req_desc = le16_to_cpu(cmd->element);
+ *num_elems = le16_to_cpu(cmd->num_elems);
+ }
+
+ return status;
+}
+
+/**
+ * ice_aq_add_vsi
+ * @hw: pointer to the HW struct
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add a VSI context to the hardware (0x0210)
+ */
+static int
+ice_aq_add_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_add_update_free_vsi_resp *res;
+ struct ice_aqc_add_get_update_free_vsi *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ cmd = &desc.params.vsi_cmd;
+ res = &desc.params.add_update_free_vsi_res;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_vsi);
+
+ if (!vsi_ctx->alloc_from_pool)
+ cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num |
+ ICE_AQ_VSI_IS_VALID);
+ cmd->vf_id = vsi_ctx->vf_num;
+
+ cmd->vsi_flags = cpu_to_le16(vsi_ctx->flags);
+
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ status = ice_aq_send_cmd(hw, &desc, &vsi_ctx->info,
+ sizeof(vsi_ctx->info), cd);
+
+ if (!status) {
+ vsi_ctx->vsi_num = le16_to_cpu(res->vsi_num) & ICE_AQ_VSI_NUM_M;
+ vsi_ctx->vsis_allocd = le16_to_cpu(res->vsi_used);
+ vsi_ctx->vsis_unallocated = le16_to_cpu(res->vsi_free);
+ }
+
+ return status;
+}
+
+/**
+ * ice_aq_free_vsi
+ * @hw: pointer to the HW struct
+ * @vsi_ctx: pointer to a VSI context struct
+ * @keep_vsi_alloc: keep VSI allocation as part of this PF's resources
+ * @cd: pointer to command details structure or NULL
+ *
+ * Free VSI context info from hardware (0x0213)
+ */
+static int
+ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+ bool keep_vsi_alloc, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_add_update_free_vsi_resp *resp;
+ struct ice_aqc_add_get_update_free_vsi *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ cmd = &desc.params.vsi_cmd;
+ resp = &desc.params.add_update_free_vsi_res;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_free_vsi);
+
+ cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID);
+ if (keep_vsi_alloc)
+ cmd->cmd_flags = cpu_to_le16(ICE_AQ_VSI_KEEP_ALLOC);
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+ if (!status) {
+ vsi_ctx->vsis_allocd = le16_to_cpu(resp->vsi_used);
+ vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
+ }
+
+ return status;
+}
+
+/**
+ * ice_aq_update_vsi
+ * @hw: pointer to the HW struct
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Update VSI context in the hardware (0x0211)
+ */
+static int
+ice_aq_update_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_add_update_free_vsi_resp *resp;
+ struct ice_aqc_add_get_update_free_vsi *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ cmd = &desc.params.vsi_cmd;
+ resp = &desc.params.add_update_free_vsi_res;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_update_vsi);
+
+ cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID);
+
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ status = ice_aq_send_cmd(hw, &desc, &vsi_ctx->info,
+ sizeof(vsi_ctx->info), cd);
+
+ if (!status) {
+ vsi_ctx->vsis_allocd = le16_to_cpu(resp->vsi_used);
+ vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
+ }
+
+ return status;
+}
+
+/**
+ * ice_is_vsi_valid - check whether the VSI is valid or not
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ *
+ * check whether the VSI is valid or not
+ */
+bool ice_is_vsi_valid(struct ice_hw *hw, u16 vsi_handle)
+{
+ return vsi_handle < ICE_MAX_VSI && hw->vsi_ctx[vsi_handle];
+}
+
+/**
+ * ice_get_hw_vsi_num - return the HW VSI number
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ *
+ * return the HW VSI number
+ * Caution: call this function only if VSI is valid (ice_is_vsi_valid)
+ */
+u16 ice_get_hw_vsi_num(struct ice_hw *hw, u16 vsi_handle)
+{
+ return hw->vsi_ctx[vsi_handle]->vsi_num;
+}
+
+/**
+ * ice_get_vsi_ctx - return the VSI context entry for a given VSI handle
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ *
+ * return the VSI context entry for a given VSI handle
+ */
+struct ice_vsi_ctx *ice_get_vsi_ctx(struct ice_hw *hw, u16 vsi_handle)
+{
+ return (vsi_handle >= ICE_MAX_VSI) ? NULL : hw->vsi_ctx[vsi_handle];
+}
+
+/**
+ * ice_save_vsi_ctx - save the VSI context for a given VSI handle
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ * @vsi: VSI context pointer
+ *
+ * save the VSI context entry for a given VSI handle
+ */
+static void
+ice_save_vsi_ctx(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi)
+{
+ hw->vsi_ctx[vsi_handle] = vsi;
+}
+
+/**
+ * ice_clear_vsi_q_ctx - clear VSI queue contexts for all TCs
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ */
+static void ice_clear_vsi_q_ctx(struct ice_hw *hw, u16 vsi_handle)
+{
+ struct ice_vsi_ctx *vsi;
+ u8 i;
+
+ vsi = ice_get_vsi_ctx(hw, vsi_handle);
+ if (!vsi)
+ return;
+ ice_for_each_traffic_class(i) {
+ if (vsi->lan_q_ctx[i]) {
+ devm_kfree(ice_hw_to_dev(hw), vsi->lan_q_ctx[i]);
+ vsi->lan_q_ctx[i] = NULL;
+ }
+ if (vsi->rdma_q_ctx[i]) {
+ devm_kfree(ice_hw_to_dev(hw), vsi->rdma_q_ctx[i]);
+ vsi->rdma_q_ctx[i] = NULL;
+ }
+ }
+}
+
+/**
+ * ice_clear_vsi_ctx - clear the VSI context entry
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ *
+ * clear the VSI context entry
+ */
+static void ice_clear_vsi_ctx(struct ice_hw *hw, u16 vsi_handle)
+{
+ struct ice_vsi_ctx *vsi;
+
+ vsi = ice_get_vsi_ctx(hw, vsi_handle);
+ if (vsi) {
+ ice_clear_vsi_q_ctx(hw, vsi_handle);
+ devm_kfree(ice_hw_to_dev(hw), vsi);
+ hw->vsi_ctx[vsi_handle] = NULL;
+ }
+}
+
+/**
+ * ice_clear_all_vsi_ctx - clear all the VSI context entries
+ * @hw: pointer to the HW struct
+ */
+void ice_clear_all_vsi_ctx(struct ice_hw *hw)
+{
+ u16 i;
+
+ for (i = 0; i < ICE_MAX_VSI; i++)
+ ice_clear_vsi_ctx(hw, i);
+}
+
+/**
+ * ice_add_vsi - add VSI context to the hardware and VSI handle list
+ * @hw: pointer to the HW struct
+ * @vsi_handle: unique VSI handle provided by drivers
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add a VSI context to the hardware also add it into the VSI handle list.
+ * If this function gets called after reset for existing VSIs then update
+ * with the new HW VSI number in the corresponding VSI handle list entry.
+ */
+int
+ice_add_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+ struct ice_sq_cd *cd)
+{
+ struct ice_vsi_ctx *tmp_vsi_ctx;
+ int status;
+
+ if (vsi_handle >= ICE_MAX_VSI)
+ return -EINVAL;
+ status = ice_aq_add_vsi(hw, vsi_ctx, cd);
+ if (status)
+ return status;
+ tmp_vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+ if (!tmp_vsi_ctx) {
+ /* Create a new VSI context */
+ tmp_vsi_ctx = devm_kzalloc(ice_hw_to_dev(hw),
+ sizeof(*tmp_vsi_ctx), GFP_KERNEL);
+ if (!tmp_vsi_ctx) {
+ ice_aq_free_vsi(hw, vsi_ctx, false, cd);
+ return -ENOMEM;
+ }
+ *tmp_vsi_ctx = *vsi_ctx;
+ ice_save_vsi_ctx(hw, vsi_handle, tmp_vsi_ctx);
+ } else {
+ /* update with new HW VSI num */
+ tmp_vsi_ctx->vsi_num = vsi_ctx->vsi_num;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_free_vsi- free VSI context from hardware and VSI handle list
+ * @hw: pointer to the HW struct
+ * @vsi_handle: unique VSI handle
+ * @vsi_ctx: pointer to a VSI context struct
+ * @keep_vsi_alloc: keep VSI allocation as part of this PF's resources
+ * @cd: pointer to command details structure or NULL
+ *
+ * Free VSI context info from hardware as well as from VSI handle list
+ */
+int
+ice_free_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+ bool keep_vsi_alloc, struct ice_sq_cd *cd)
+{
+ int status;
+
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+ vsi_ctx->vsi_num = ice_get_hw_vsi_num(hw, vsi_handle);
+ status = ice_aq_free_vsi(hw, vsi_ctx, keep_vsi_alloc, cd);
+ if (!status)
+ ice_clear_vsi_ctx(hw, vsi_handle);
+ return status;
+}
+
+/**
+ * ice_update_vsi
+ * @hw: pointer to the HW struct
+ * @vsi_handle: unique VSI handle
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Update VSI context in the hardware
+ */
+int
+ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+ struct ice_sq_cd *cd)
+{
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+ vsi_ctx->vsi_num = ice_get_hw_vsi_num(hw, vsi_handle);
+ return ice_aq_update_vsi(hw, vsi_ctx, cd);
+}
+
+/**
+ * ice_cfg_rdma_fltr - enable/disable RDMA filtering on VSI
+ * @hw: pointer to HW struct
+ * @vsi_handle: VSI SW index
+ * @enable: boolean for enable/disable
+ */
+int
+ice_cfg_rdma_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable)
+{
+ struct ice_vsi_ctx *ctx, *cached_ctx;
+ int status;
+
+ cached_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+ if (!cached_ctx)
+ return -ENOENT;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->info.q_opt_rss = cached_ctx->info.q_opt_rss;
+ ctx->info.q_opt_tc = cached_ctx->info.q_opt_tc;
+ ctx->info.q_opt_flags = cached_ctx->info.q_opt_flags;
+
+ ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
+
+ if (enable)
+ ctx->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+ else
+ ctx->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+
+ status = ice_update_vsi(hw, vsi_handle, ctx, NULL);
+ if (!status) {
+ cached_ctx->info.q_opt_flags = ctx->info.q_opt_flags;
+ cached_ctx->info.valid_sections |= ctx->info.valid_sections;
+ }
+
+ kfree(ctx);
+ return status;
+}
+
+/**
+ * ice_aq_alloc_free_vsi_list
+ * @hw: pointer to the HW struct
+ * @vsi_list_id: VSI list ID returned or used for lookup
+ * @lkup_type: switch rule filter lookup type
+ * @opc: switch rules population command type - pass in the command opcode
+ *
+ * allocates or free a VSI list resource
+ */
+static int
+ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id,
+ enum ice_sw_lkup_type lkup_type,
+ enum ice_adminq_opc opc)
+{
+ struct ice_aqc_alloc_free_res_elem *sw_buf;
+ struct ice_aqc_res_elem *vsi_ele;
+ u16 buf_len;
+ int status;
+
+ buf_len = struct_size(sw_buf, elem, 1);
+ sw_buf = devm_kzalloc(ice_hw_to_dev(hw), buf_len, GFP_KERNEL);
+ if (!sw_buf)
+ return -ENOMEM;
+ sw_buf->num_elems = cpu_to_le16(1);
+
+ if (lkup_type == ICE_SW_LKUP_MAC ||
+ lkup_type == ICE_SW_LKUP_MAC_VLAN ||
+ lkup_type == ICE_SW_LKUP_ETHERTYPE ||
+ lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC ||
+ lkup_type == ICE_SW_LKUP_PROMISC ||
+ lkup_type == ICE_SW_LKUP_PROMISC_VLAN ||
+ lkup_type == ICE_SW_LKUP_DFLT) {
+ sw_buf->res_type = cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_REP);
+ } else if (lkup_type == ICE_SW_LKUP_VLAN) {
+ sw_buf->res_type =
+ cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_PRUNE);
+ } else {
+ status = -EINVAL;
+ goto ice_aq_alloc_free_vsi_list_exit;
+ }
+
+ if (opc == ice_aqc_opc_free_res)
+ sw_buf->elem[0].e.sw_resp = cpu_to_le16(*vsi_list_id);
+
+ status = ice_aq_alloc_free_res(hw, 1, sw_buf, buf_len, opc, NULL);
+ if (status)
+ goto ice_aq_alloc_free_vsi_list_exit;
+
+ if (opc == ice_aqc_opc_alloc_res) {
+ vsi_ele = &sw_buf->elem[0];
+ *vsi_list_id = le16_to_cpu(vsi_ele->e.sw_resp);
+ }
+
+ice_aq_alloc_free_vsi_list_exit:
+ devm_kfree(ice_hw_to_dev(hw), sw_buf);
+ return status;
+}
+
+/**
+ * ice_aq_sw_rules - add/update/remove switch rules
+ * @hw: pointer to the HW struct
+ * @rule_list: pointer to switch rule population list
+ * @rule_list_sz: total size of the rule list in bytes
+ * @num_rules: number of switch rules in the rule_list
+ * @opc: switch rules population command type - pass in the command opcode
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add(0x02a0)/Update(0x02a1)/Remove(0x02a2) switch rules commands to firmware
+ */
+int
+ice_aq_sw_rules(struct ice_hw *hw, void *rule_list, u16 rule_list_sz,
+ u8 num_rules, enum ice_adminq_opc opc, struct ice_sq_cd *cd)
+{
+ struct ice_aq_desc desc;
+ int status;
+
+ if (opc != ice_aqc_opc_add_sw_rules &&
+ opc != ice_aqc_opc_update_sw_rules &&
+ opc != ice_aqc_opc_remove_sw_rules)
+ return -EINVAL;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, opc);
+
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+ desc.params.sw_rules.num_rules_fltr_entry_index =
+ cpu_to_le16(num_rules);
+ status = ice_aq_send_cmd(hw, &desc, rule_list, rule_list_sz, cd);
+ if (opc != ice_aqc_opc_add_sw_rules &&
+ hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)
+ status = -ENOENT;
+
+ return status;
+}
+
+/**
+ * ice_aq_add_recipe - add switch recipe
+ * @hw: pointer to the HW struct
+ * @s_recipe_list: pointer to switch rule population list
+ * @num_recipes: number of switch recipes in the list
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add(0x0290)
+ */
+static int
+ice_aq_add_recipe(struct ice_hw *hw,
+ struct ice_aqc_recipe_data_elem *s_recipe_list,
+ u16 num_recipes, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_add_get_recipe *cmd;
+ struct ice_aq_desc desc;
+ u16 buf_size;
+
+ cmd = &desc.params.add_get_recipe;
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_recipe);
+
+ cmd->num_sub_recipes = cpu_to_le16(num_recipes);
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ buf_size = num_recipes * sizeof(*s_recipe_list);
+
+ return ice_aq_send_cmd(hw, &desc, s_recipe_list, buf_size, cd);
+}
+
+/**
+ * ice_aq_get_recipe - get switch recipe
+ * @hw: pointer to the HW struct
+ * @s_recipe_list: pointer to switch rule population list
+ * @num_recipes: pointer to the number of recipes (input and output)
+ * @recipe_root: root recipe number of recipe(s) to retrieve
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get(0x0292)
+ *
+ * On input, *num_recipes should equal the number of entries in s_recipe_list.
+ * On output, *num_recipes will equal the number of entries returned in
+ * s_recipe_list.
+ *
+ * The caller must supply enough space in s_recipe_list to hold all possible
+ * recipes and *num_recipes must equal ICE_MAX_NUM_RECIPES.
+ */
+static int
+ice_aq_get_recipe(struct ice_hw *hw,
+ struct ice_aqc_recipe_data_elem *s_recipe_list,
+ u16 *num_recipes, u16 recipe_root, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_add_get_recipe *cmd;
+ struct ice_aq_desc desc;
+ u16 buf_size;
+ int status;
+
+ if (*num_recipes != ICE_MAX_NUM_RECIPES)
+ return -EINVAL;
+
+ cmd = &desc.params.add_get_recipe;
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_recipe);
+
+ cmd->return_index = cpu_to_le16(recipe_root);
+ cmd->num_sub_recipes = 0;
+
+ buf_size = *num_recipes * sizeof(*s_recipe_list);
+
+ status = ice_aq_send_cmd(hw, &desc, s_recipe_list, buf_size, cd);
+ *num_recipes = le16_to_cpu(cmd->num_sub_recipes);
+
+ return status;
+}
+
+/**
+ * ice_update_recipe_lkup_idx - update a default recipe based on the lkup_idx
+ * @hw: pointer to the HW struct
+ * @params: parameters used to update the default recipe
+ *
+ * This function only supports updating default recipes and it only supports
+ * updating a single recipe based on the lkup_idx at a time.
+ *
+ * This is done as a read-modify-write operation. First, get the current recipe
+ * contents based on the recipe's ID. Then modify the field vector index and
+ * mask if it's valid at the lkup_idx. Finally, use the add recipe AQ to update
+ * the pre-existing recipe with the modifications.
+ */
+int
+ice_update_recipe_lkup_idx(struct ice_hw *hw,
+ struct ice_update_recipe_lkup_idx_params *params)
+{
+ struct ice_aqc_recipe_data_elem *rcp_list;
+ u16 num_recps = ICE_MAX_NUM_RECIPES;
+ int status;
+
+ rcp_list = kcalloc(num_recps, sizeof(*rcp_list), GFP_KERNEL);
+ if (!rcp_list)
+ return -ENOMEM;
+
+ /* read current recipe list from firmware */
+ rcp_list->recipe_indx = params->rid;
+ status = ice_aq_get_recipe(hw, rcp_list, &num_recps, params->rid, NULL);
+ if (status) {
+ ice_debug(hw, ICE_DBG_SW, "Failed to get recipe %d, status %d\n",
+ params->rid, status);
+ goto error_out;
+ }
+
+ /* only modify existing recipe's lkup_idx and mask if valid, while
+ * leaving all other fields the same, then update the recipe firmware
+ */
+ rcp_list->content.lkup_indx[params->lkup_idx] = params->fv_idx;
+ if (params->mask_valid)
+ rcp_list->content.mask[params->lkup_idx] =
+ cpu_to_le16(params->mask);
+
+ if (params->ignore_valid)
+ rcp_list->content.lkup_indx[params->lkup_idx] |=
+ ICE_AQ_RECIPE_LKUP_IGNORE;
+
+ status = ice_aq_add_recipe(hw, &rcp_list[0], 1, NULL);
+ if (status)
+ ice_debug(hw, ICE_DBG_SW, "Failed to update recipe %d lkup_idx %d fv_idx %d mask %d mask_valid %s, status %d\n",
+ params->rid, params->lkup_idx, params->fv_idx,
+ params->mask, params->mask_valid ? "true" : "false",
+ status);
+
+error_out:
+ kfree(rcp_list);
+ return status;
+}
+
+/**
+ * ice_aq_map_recipe_to_profile - Map recipe to packet profile
+ * @hw: pointer to the HW struct
+ * @profile_id: package profile ID to associate the recipe with
+ * @r_bitmap: Recipe bitmap filled in and need to be returned as response
+ * @cd: pointer to command details structure or NULL
+ * Recipe to profile association (0x0291)
+ */
+static int
+ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_recipe_to_profile *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.recipe_to_profile;
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_recipe_to_profile);
+ cmd->profile_id = cpu_to_le16(profile_id);
+ /* Set the recipe ID bit in the bitmask to let the device know which
+ * profile we are associating the recipe to
+ */
+ memcpy(cmd->recipe_assoc, r_bitmap, sizeof(cmd->recipe_assoc));
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_get_recipe_to_profile - Map recipe to packet profile
+ * @hw: pointer to the HW struct
+ * @profile_id: package profile ID to associate the recipe with
+ * @r_bitmap: Recipe bitmap filled in and need to be returned as response
+ * @cd: pointer to command details structure or NULL
+ * Associate profile ID with given recipe (0x0293)
+ */
+static int
+ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_recipe_to_profile *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ cmd = &desc.params.recipe_to_profile;
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_recipe_to_profile);
+ cmd->profile_id = cpu_to_le16(profile_id);
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+ if (!status)
+ memcpy(r_bitmap, cmd->recipe_assoc, sizeof(cmd->recipe_assoc));
+
+ return status;
+}
+
+/**
+ * ice_alloc_recipe - add recipe resource
+ * @hw: pointer to the hardware structure
+ * @rid: recipe ID returned as response to AQ call
+ */
+static int ice_alloc_recipe(struct ice_hw *hw, u16 *rid)
+{
+ struct ice_aqc_alloc_free_res_elem *sw_buf;
+ u16 buf_len;
+ int status;
+
+ buf_len = struct_size(sw_buf, elem, 1);
+ sw_buf = kzalloc(buf_len, GFP_KERNEL);
+ if (!sw_buf)
+ return -ENOMEM;
+
+ sw_buf->num_elems = cpu_to_le16(1);
+ sw_buf->res_type = cpu_to_le16((ICE_AQC_RES_TYPE_RECIPE <<
+ ICE_AQC_RES_TYPE_S) |
+ ICE_AQC_RES_TYPE_FLAG_SHARED);
+ status = ice_aq_alloc_free_res(hw, 1, sw_buf, buf_len,
+ ice_aqc_opc_alloc_res, NULL);
+ if (!status)
+ *rid = le16_to_cpu(sw_buf->elem[0].e.sw_resp);
+ kfree(sw_buf);
+
+ return status;
+}
+
+/**
+ * ice_get_recp_to_prof_map - updates recipe to profile mapping
+ * @hw: pointer to hardware structure
+ *
+ * This function is used to populate recipe_to_profile matrix where index to
+ * this array is the recipe ID and the element is the mapping of which profiles
+ * is this recipe mapped to.
+ */
+static void ice_get_recp_to_prof_map(struct ice_hw *hw)
+{
+ DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
+ u16 i;
+
+ for (i = 0; i < hw->switch_info->max_used_prof_index + 1; i++) {
+ u16 j;
+
+ bitmap_zero(profile_to_recipe[i], ICE_MAX_NUM_RECIPES);
+ bitmap_zero(r_bitmap, ICE_MAX_NUM_RECIPES);
+ if (ice_aq_get_recipe_to_profile(hw, i, (u8 *)r_bitmap, NULL))
+ continue;
+ bitmap_copy(profile_to_recipe[i], r_bitmap,
+ ICE_MAX_NUM_RECIPES);
+ for_each_set_bit(j, r_bitmap, ICE_MAX_NUM_RECIPES)
+ set_bit(i, recipe_to_profile[j]);
+ }
+}
+
+/**
+ * ice_collect_result_idx - copy result index values
+ * @buf: buffer that contains the result index
+ * @recp: the recipe struct to copy data into
+ */
+static void
+ice_collect_result_idx(struct ice_aqc_recipe_data_elem *buf,
+ struct ice_sw_recipe *recp)
+{
+ if (buf->content.result_indx & ICE_AQ_RECIPE_RESULT_EN)
+ set_bit(buf->content.result_indx & ~ICE_AQ_RECIPE_RESULT_EN,
+ recp->res_idxs);
+}
+
+/**
+ * ice_get_recp_frm_fw - update SW bookkeeping from FW recipe entries
+ * @hw: pointer to hardware structure
+ * @recps: struct that we need to populate
+ * @rid: recipe ID that we are populating
+ * @refresh_required: true if we should get recipe to profile mapping from FW
+ *
+ * This function is used to populate all the necessary entries into our
+ * bookkeeping so that we have a current list of all the recipes that are
+ * programmed in the firmware.
+ */
+static int
+ice_get_recp_frm_fw(struct ice_hw *hw, struct ice_sw_recipe *recps, u8 rid,
+ bool *refresh_required)
+{
+ DECLARE_BITMAP(result_bm, ICE_MAX_FV_WORDS);
+ struct ice_aqc_recipe_data_elem *tmp;
+ u16 num_recps = ICE_MAX_NUM_RECIPES;
+ struct ice_prot_lkup_ext *lkup_exts;
+ u8 fv_word_idx = 0;
+ u16 sub_recps;
+ int status;
+
+ bitmap_zero(result_bm, ICE_MAX_FV_WORDS);
+
+ /* we need a buffer big enough to accommodate all the recipes */
+ tmp = kcalloc(ICE_MAX_NUM_RECIPES, sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ tmp[0].recipe_indx = rid;
+ status = ice_aq_get_recipe(hw, tmp, &num_recps, rid, NULL);
+ /* non-zero status meaning recipe doesn't exist */
+ if (status)
+ goto err_unroll;
+
+ /* Get recipe to profile map so that we can get the fv from lkups that
+ * we read for a recipe from FW. Since we want to minimize the number of
+ * times we make this FW call, just make one call and cache the copy
+ * until a new recipe is added. This operation is only required the
+ * first time to get the changes from FW. Then to search existing
+ * entries we don't need to update the cache again until another recipe
+ * gets added.
+ */
+ if (*refresh_required) {
+ ice_get_recp_to_prof_map(hw);
+ *refresh_required = false;
+ }
+
+ /* Start populating all the entries for recps[rid] based on lkups from
+ * firmware. Note that we are only creating the root recipe in our
+ * database.
+ */
+ lkup_exts = &recps[rid].lkup_exts;
+
+ for (sub_recps = 0; sub_recps < num_recps; sub_recps++) {
+ struct ice_aqc_recipe_data_elem root_bufs = tmp[sub_recps];
+ struct ice_recp_grp_entry *rg_entry;
+ u8 i, prof, idx, prot = 0;
+ bool is_root;
+ u16 off = 0;
+
+ rg_entry = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*rg_entry),
+ GFP_KERNEL);
+ if (!rg_entry) {
+ status = -ENOMEM;
+ goto err_unroll;
+ }
+
+ idx = root_bufs.recipe_indx;
+ is_root = root_bufs.content.rid & ICE_AQ_RECIPE_ID_IS_ROOT;
+
+ /* Mark all result indices in this chain */
+ if (root_bufs.content.result_indx & ICE_AQ_RECIPE_RESULT_EN)
+ set_bit(root_bufs.content.result_indx & ~ICE_AQ_RECIPE_RESULT_EN,
+ result_bm);
+
+ /* get the first profile that is associated with rid */
+ prof = find_first_bit(recipe_to_profile[idx],
+ ICE_MAX_NUM_PROFILES);
+ for (i = 0; i < ICE_NUM_WORDS_RECIPE; i++) {
+ u8 lkup_indx = root_bufs.content.lkup_indx[i + 1];
+
+ rg_entry->fv_idx[i] = lkup_indx;
+ rg_entry->fv_mask[i] =
+ le16_to_cpu(root_bufs.content.mask[i + 1]);
+
+ /* If the recipe is a chained recipe then all its
+ * child recipe's result will have a result index.
+ * To fill fv_words we should not use those result
+ * index, we only need the protocol ids and offsets.
+ * We will skip all the fv_idx which stores result
+ * index in them. We also need to skip any fv_idx which
+ * has ICE_AQ_RECIPE_LKUP_IGNORE or 0 since it isn't a
+ * valid offset value.
+ */
+ if (test_bit(rg_entry->fv_idx[i], hw->switch_info->prof_res_bm[prof]) ||
+ rg_entry->fv_idx[i] & ICE_AQ_RECIPE_LKUP_IGNORE ||
+ rg_entry->fv_idx[i] == 0)
+ continue;
+
+ ice_find_prot_off(hw, ICE_BLK_SW, prof,
+ rg_entry->fv_idx[i], &prot, &off);
+ lkup_exts->fv_words[fv_word_idx].prot_id = prot;
+ lkup_exts->fv_words[fv_word_idx].off = off;
+ lkup_exts->field_mask[fv_word_idx] =
+ rg_entry->fv_mask[i];
+ fv_word_idx++;
+ }
+ /* populate rg_list with the data from the child entry of this
+ * recipe
+ */
+ list_add(&rg_entry->l_entry, &recps[rid].rg_list);
+
+ /* Propagate some data to the recipe database */
+ recps[idx].is_root = !!is_root;
+ recps[idx].priority = root_bufs.content.act_ctrl_fwd_priority;
+ bitmap_zero(recps[idx].res_idxs, ICE_MAX_FV_WORDS);
+ if (root_bufs.content.result_indx & ICE_AQ_RECIPE_RESULT_EN) {
+ recps[idx].chain_idx = root_bufs.content.result_indx &
+ ~ICE_AQ_RECIPE_RESULT_EN;
+ set_bit(recps[idx].chain_idx, recps[idx].res_idxs);
+ } else {
+ recps[idx].chain_idx = ICE_INVAL_CHAIN_IND;
+ }
+
+ if (!is_root)
+ continue;
+
+ /* Only do the following for root recipes entries */
+ memcpy(recps[idx].r_bitmap, root_bufs.recipe_bitmap,
+ sizeof(recps[idx].r_bitmap));
+ recps[idx].root_rid = root_bufs.content.rid &
+ ~ICE_AQ_RECIPE_ID_IS_ROOT;
+ recps[idx].priority = root_bufs.content.act_ctrl_fwd_priority;
+ }
+
+ /* Complete initialization of the root recipe entry */
+ lkup_exts->n_val_words = fv_word_idx;
+ recps[rid].big_recp = (num_recps > 1);
+ recps[rid].n_grp_count = (u8)num_recps;
+ recps[rid].root_buf = devm_kmemdup(ice_hw_to_dev(hw), tmp,
+ recps[rid].n_grp_count * sizeof(*recps[rid].root_buf),
+ GFP_KERNEL);
+ if (!recps[rid].root_buf) {
+ status = -ENOMEM;
+ goto err_unroll;
+ }
+
+ /* Copy result indexes */
+ bitmap_copy(recps[rid].res_idxs, result_bm, ICE_MAX_FV_WORDS);
+ recps[rid].recp_created = true;
+
+err_unroll:
+ kfree(tmp);
+ return status;
+}
+
+/* ice_init_port_info - Initialize port_info with switch configuration data
+ * @pi: pointer to port_info
+ * @vsi_port_num: VSI number or port number
+ * @type: Type of switch element (port or VSI)
+ * @swid: switch ID of the switch the element is attached to
+ * @pf_vf_num: PF or VF number
+ * @is_vf: true if the element is a VF, false otherwise
+ */
+static void
+ice_init_port_info(struct ice_port_info *pi, u16 vsi_port_num, u8 type,
+ u16 swid, u16 pf_vf_num, bool is_vf)
+{
+ switch (type) {
+ case ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT:
+ pi->lport = (u8)(vsi_port_num & ICE_LPORT_MASK);
+ pi->sw_id = swid;
+ pi->pf_vf_num = pf_vf_num;
+ pi->is_vf = is_vf;
+ break;
+ default:
+ ice_debug(pi->hw, ICE_DBG_SW, "incorrect VSI/port type received\n");
+ break;
+ }
+}
+
+/* ice_get_initial_sw_cfg - Get initial port and default VSI data
+ * @hw: pointer to the hardware structure
+ */
+int ice_get_initial_sw_cfg(struct ice_hw *hw)
+{
+ struct ice_aqc_get_sw_cfg_resp_elem *rbuf;
+ u16 req_desc = 0;
+ u16 num_elems;
+ int status;
+ u16 i;
+
+ rbuf = kzalloc(ICE_SW_CFG_MAX_BUF_LEN, GFP_KERNEL);
+ if (!rbuf)
+ return -ENOMEM;
+
+ /* Multiple calls to ice_aq_get_sw_cfg may be required
+ * to get all the switch configuration information. The need
+ * for additional calls is indicated by ice_aq_get_sw_cfg
+ * writing a non-zero value in req_desc
+ */
+ do {
+ struct ice_aqc_get_sw_cfg_resp_elem *ele;
+
+ status = ice_aq_get_sw_cfg(hw, rbuf, ICE_SW_CFG_MAX_BUF_LEN,
+ &req_desc, &num_elems, NULL);
+
+ if (status)
+ break;
+
+ for (i = 0, ele = rbuf; i < num_elems; i++, ele++) {
+ u16 pf_vf_num, swid, vsi_port_num;
+ bool is_vf = false;
+ u8 res_type;
+
+ vsi_port_num = le16_to_cpu(ele->vsi_port_num) &
+ ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_M;
+
+ pf_vf_num = le16_to_cpu(ele->pf_vf_num) &
+ ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_M;
+
+ swid = le16_to_cpu(ele->swid);
+
+ if (le16_to_cpu(ele->pf_vf_num) &
+ ICE_AQC_GET_SW_CONF_RESP_IS_VF)
+ is_vf = true;
+
+ res_type = (u8)(le16_to_cpu(ele->vsi_port_num) >>
+ ICE_AQC_GET_SW_CONF_RESP_TYPE_S);
+
+ if (res_type == ICE_AQC_GET_SW_CONF_RESP_VSI) {
+ /* FW VSI is not needed. Just continue. */
+ continue;
+ }
+
+ ice_init_port_info(hw->port_info, vsi_port_num,
+ res_type, swid, pf_vf_num, is_vf);
+ }
+ } while (req_desc && !status);
+
+ kfree(rbuf);
+ return status;
+}
+
+/**
+ * ice_fill_sw_info - Helper function to populate lb_en and lan_en
+ * @hw: pointer to the hardware structure
+ * @fi: filter info structure to fill/update
+ *
+ * This helper function populates the lb_en and lan_en elements of the provided
+ * ice_fltr_info struct using the switch's type and characteristics of the
+ * switch rule being configured.
+ */
+static void ice_fill_sw_info(struct ice_hw *hw, struct ice_fltr_info *fi)
+{
+ fi->lb_en = false;
+ fi->lan_en = false;
+ if ((fi->flag & ICE_FLTR_TX) &&
+ (fi->fltr_act == ICE_FWD_TO_VSI ||
+ fi->fltr_act == ICE_FWD_TO_VSI_LIST ||
+ fi->fltr_act == ICE_FWD_TO_Q ||
+ fi->fltr_act == ICE_FWD_TO_QGRP)) {
+ /* Setting LB for prune actions will result in replicated
+ * packets to the internal switch that will be dropped.
+ */
+ if (fi->lkup_type != ICE_SW_LKUP_VLAN)
+ fi->lb_en = true;
+
+ /* Set lan_en to TRUE if
+ * 1. The switch is a VEB AND
+ * 2
+ * 2.1 The lookup is a directional lookup like ethertype,
+ * promiscuous, ethertype-MAC, promiscuous-VLAN
+ * and default-port OR
+ * 2.2 The lookup is VLAN, OR
+ * 2.3 The lookup is MAC with mcast or bcast addr for MAC, OR
+ * 2.4 The lookup is MAC_VLAN with mcast or bcast addr for MAC.
+ *
+ * OR
+ *
+ * The switch is a VEPA.
+ *
+ * In all other cases, the LAN enable has to be set to false.
+ */
+ if (hw->evb_veb) {
+ if (fi->lkup_type == ICE_SW_LKUP_ETHERTYPE ||
+ fi->lkup_type == ICE_SW_LKUP_PROMISC ||
+ fi->lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC ||
+ fi->lkup_type == ICE_SW_LKUP_PROMISC_VLAN ||
+ fi->lkup_type == ICE_SW_LKUP_DFLT ||
+ fi->lkup_type == ICE_SW_LKUP_VLAN ||
+ (fi->lkup_type == ICE_SW_LKUP_MAC &&
+ !is_unicast_ether_addr(fi->l_data.mac.mac_addr)) ||
+ (fi->lkup_type == ICE_SW_LKUP_MAC_VLAN &&
+ !is_unicast_ether_addr(fi->l_data.mac.mac_addr)))
+ fi->lan_en = true;
+ } else {
+ fi->lan_en = true;
+ }
+ }
+}
+
+/**
+ * ice_fill_sw_rule - Helper function to fill switch rule structure
+ * @hw: pointer to the hardware structure
+ * @f_info: entry containing packet forwarding information
+ * @s_rule: switch rule structure to be filled in based on mac_entry
+ * @opc: switch rules population command type - pass in the command opcode
+ */
+static void
+ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info,
+ struct ice_sw_rule_lkup_rx_tx *s_rule,
+ enum ice_adminq_opc opc)
+{
+ u16 vlan_id = ICE_MAX_VLAN_ID + 1;
+ u16 vlan_tpid = ETH_P_8021Q;
+ void *daddr = NULL;
+ u16 eth_hdr_sz;
+ u8 *eth_hdr;
+ u32 act = 0;
+ __be16 *off;
+ u8 q_rgn;
+
+ if (opc == ice_aqc_opc_remove_sw_rules) {
+ s_rule->act = 0;
+ s_rule->index = cpu_to_le16(f_info->fltr_rule_id);
+ s_rule->hdr_len = 0;
+ return;
+ }
+
+ eth_hdr_sz = sizeof(dummy_eth_header);
+ eth_hdr = s_rule->hdr_data;
+
+ /* initialize the ether header with a dummy header */
+ memcpy(eth_hdr, dummy_eth_header, eth_hdr_sz);
+ ice_fill_sw_info(hw, f_info);
+
+ switch (f_info->fltr_act) {
+ case ICE_FWD_TO_VSI:
+ act |= (f_info->fwd_id.hw_vsi_id << ICE_SINGLE_ACT_VSI_ID_S) &
+ ICE_SINGLE_ACT_VSI_ID_M;
+ if (f_info->lkup_type != ICE_SW_LKUP_VLAN)
+ act |= ICE_SINGLE_ACT_VSI_FORWARDING |
+ ICE_SINGLE_ACT_VALID_BIT;
+ break;
+ case ICE_FWD_TO_VSI_LIST:
+ act |= ICE_SINGLE_ACT_VSI_LIST;
+ act |= (f_info->fwd_id.vsi_list_id <<
+ ICE_SINGLE_ACT_VSI_LIST_ID_S) &
+ ICE_SINGLE_ACT_VSI_LIST_ID_M;
+ if (f_info->lkup_type != ICE_SW_LKUP_VLAN)
+ act |= ICE_SINGLE_ACT_VSI_FORWARDING |
+ ICE_SINGLE_ACT_VALID_BIT;
+ break;
+ case ICE_FWD_TO_Q:
+ act |= ICE_SINGLE_ACT_TO_Q;
+ act |= (f_info->fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) &
+ ICE_SINGLE_ACT_Q_INDEX_M;
+ break;
+ case ICE_DROP_PACKET:
+ act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_DROP |
+ ICE_SINGLE_ACT_VALID_BIT;
+ break;
+ case ICE_FWD_TO_QGRP:
+ q_rgn = f_info->qgrp_size > 0 ?
+ (u8)ilog2(f_info->qgrp_size) : 0;
+ act |= ICE_SINGLE_ACT_TO_Q;
+ act |= (f_info->fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) &
+ ICE_SINGLE_ACT_Q_INDEX_M;
+ act |= (q_rgn << ICE_SINGLE_ACT_Q_REGION_S) &
+ ICE_SINGLE_ACT_Q_REGION_M;
+ break;
+ default:
+ return;
+ }
+
+ if (f_info->lb_en)
+ act |= ICE_SINGLE_ACT_LB_ENABLE;
+ if (f_info->lan_en)
+ act |= ICE_SINGLE_ACT_LAN_ENABLE;
+
+ switch (f_info->lkup_type) {
+ case ICE_SW_LKUP_MAC:
+ daddr = f_info->l_data.mac.mac_addr;
+ break;
+ case ICE_SW_LKUP_VLAN:
+ vlan_id = f_info->l_data.vlan.vlan_id;
+ if (f_info->l_data.vlan.tpid_valid)
+ vlan_tpid = f_info->l_data.vlan.tpid;
+ if (f_info->fltr_act == ICE_FWD_TO_VSI ||
+ f_info->fltr_act == ICE_FWD_TO_VSI_LIST) {
+ act |= ICE_SINGLE_ACT_PRUNE;
+ act |= ICE_SINGLE_ACT_EGRESS | ICE_SINGLE_ACT_INGRESS;
+ }
+ break;
+ case ICE_SW_LKUP_ETHERTYPE_MAC:
+ daddr = f_info->l_data.ethertype_mac.mac_addr;
+ fallthrough;
+ case ICE_SW_LKUP_ETHERTYPE:
+ off = (__force __be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET);
+ *off = cpu_to_be16(f_info->l_data.ethertype_mac.ethertype);
+ break;
+ case ICE_SW_LKUP_MAC_VLAN:
+ daddr = f_info->l_data.mac_vlan.mac_addr;
+ vlan_id = f_info->l_data.mac_vlan.vlan_id;
+ break;
+ case ICE_SW_LKUP_PROMISC_VLAN:
+ vlan_id = f_info->l_data.mac_vlan.vlan_id;
+ fallthrough;
+ case ICE_SW_LKUP_PROMISC:
+ daddr = f_info->l_data.mac_vlan.mac_addr;
+ break;
+ default:
+ break;
+ }
+
+ s_rule->hdr.type = (f_info->flag & ICE_FLTR_RX) ?
+ cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX) :
+ cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX);
+
+ /* Recipe set depending on lookup type */
+ s_rule->recipe_id = cpu_to_le16(f_info->lkup_type);
+ s_rule->src = cpu_to_le16(f_info->src);
+ s_rule->act = cpu_to_le32(act);
+
+ if (daddr)
+ ether_addr_copy(eth_hdr + ICE_ETH_DA_OFFSET, daddr);
+
+ if (!(vlan_id > ICE_MAX_VLAN_ID)) {
+ off = (__force __be16 *)(eth_hdr + ICE_ETH_VLAN_TCI_OFFSET);
+ *off = cpu_to_be16(vlan_id);
+ off = (__force __be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET);
+ *off = cpu_to_be16(vlan_tpid);
+ }
+
+ /* Create the switch rule with the final dummy Ethernet header */
+ if (opc != ice_aqc_opc_update_sw_rules)
+ s_rule->hdr_len = cpu_to_le16(eth_hdr_sz);
+}
+
+/**
+ * ice_add_marker_act
+ * @hw: pointer to the hardware structure
+ * @m_ent: the management entry for which sw marker needs to be added
+ * @sw_marker: sw marker to tag the Rx descriptor with
+ * @l_id: large action resource ID
+ *
+ * Create a large action to hold software marker and update the switch rule
+ * entry pointed by m_ent with newly created large action
+ */
+static int
+ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent,
+ u16 sw_marker, u16 l_id)
+{
+ struct ice_sw_rule_lkup_rx_tx *rx_tx;
+ struct ice_sw_rule_lg_act *lg_act;
+ /* For software marker we need 3 large actions
+ * 1. FWD action: FWD TO VSI or VSI LIST
+ * 2. GENERIC VALUE action to hold the profile ID
+ * 3. GENERIC VALUE action to hold the software marker ID
+ */
+ const u16 num_lg_acts = 3;
+ u16 lg_act_size;
+ u16 rules_size;
+ int status;
+ u32 act;
+ u16 id;
+
+ if (m_ent->fltr_info.lkup_type != ICE_SW_LKUP_MAC)
+ return -EINVAL;
+
+ /* Create two back-to-back switch rules and submit them to the HW using
+ * one memory buffer:
+ * 1. Large Action
+ * 2. Look up Tx Rx
+ */
+ lg_act_size = (u16)ICE_SW_RULE_LG_ACT_SIZE(lg_act, num_lg_acts);
+ rules_size = lg_act_size + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(rx_tx);
+ lg_act = devm_kzalloc(ice_hw_to_dev(hw), rules_size, GFP_KERNEL);
+ if (!lg_act)
+ return -ENOMEM;
+
+ rx_tx = (typeof(rx_tx))((u8 *)lg_act + lg_act_size);
+
+ /* Fill in the first switch rule i.e. large action */
+ lg_act->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LG_ACT);
+ lg_act->index = cpu_to_le16(l_id);
+ lg_act->size = cpu_to_le16(num_lg_acts);
+
+ /* First action VSI forwarding or VSI list forwarding depending on how
+ * many VSIs
+ */
+ id = (m_ent->vsi_count > 1) ? m_ent->fltr_info.fwd_id.vsi_list_id :
+ m_ent->fltr_info.fwd_id.hw_vsi_id;
+
+ act = ICE_LG_ACT_VSI_FORWARDING | ICE_LG_ACT_VALID_BIT;
+ act |= (id << ICE_LG_ACT_VSI_LIST_ID_S) & ICE_LG_ACT_VSI_LIST_ID_M;
+ if (m_ent->vsi_count > 1)
+ act |= ICE_LG_ACT_VSI_LIST;
+ lg_act->act[0] = cpu_to_le32(act);
+
+ /* Second action descriptor type */
+ act = ICE_LG_ACT_GENERIC;
+
+ act |= (1 << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M;
+ lg_act->act[1] = cpu_to_le32(act);
+
+ act = (ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX <<
+ ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_OFFSET_M;
+
+ /* Third action Marker value */
+ act |= ICE_LG_ACT_GENERIC;
+ act |= (sw_marker << ICE_LG_ACT_GENERIC_VALUE_S) &
+ ICE_LG_ACT_GENERIC_VALUE_M;
+
+ lg_act->act[2] = cpu_to_le32(act);
+
+ /* call the fill switch rule to fill the lookup Tx Rx structure */
+ ice_fill_sw_rule(hw, &m_ent->fltr_info, rx_tx,
+ ice_aqc_opc_update_sw_rules);
+
+ /* Update the action to point to the large action ID */
+ rx_tx->act = cpu_to_le32(ICE_SINGLE_ACT_PTR |
+ ((l_id << ICE_SINGLE_ACT_PTR_VAL_S) &
+ ICE_SINGLE_ACT_PTR_VAL_M));
+
+ /* Use the filter rule ID of the previously created rule with single
+ * act. Once the update happens, hardware will treat this as large
+ * action
+ */
+ rx_tx->index = cpu_to_le16(m_ent->fltr_info.fltr_rule_id);
+
+ status = ice_aq_sw_rules(hw, lg_act, rules_size, 2,
+ ice_aqc_opc_update_sw_rules, NULL);
+ if (!status) {
+ m_ent->lg_act_idx = l_id;
+ m_ent->sw_marker_id = sw_marker;
+ }
+
+ devm_kfree(ice_hw_to_dev(hw), lg_act);
+ return status;
+}
+
+/**
+ * ice_create_vsi_list_map
+ * @hw: pointer to the hardware structure
+ * @vsi_handle_arr: array of VSI handles to set in the VSI mapping
+ * @num_vsi: number of VSI handles in the array
+ * @vsi_list_id: VSI list ID generated as part of allocate resource
+ *
+ * Helper function to create a new entry of VSI list ID to VSI mapping
+ * using the given VSI list ID
+ */
+static struct ice_vsi_list_map_info *
+ice_create_vsi_list_map(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi,
+ u16 vsi_list_id)
+{
+ struct ice_switch_info *sw = hw->switch_info;
+ struct ice_vsi_list_map_info *v_map;
+ int i;
+
+ v_map = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*v_map), GFP_KERNEL);
+ if (!v_map)
+ return NULL;
+
+ v_map->vsi_list_id = vsi_list_id;
+ v_map->ref_cnt = 1;
+ for (i = 0; i < num_vsi; i++)
+ set_bit(vsi_handle_arr[i], v_map->vsi_map);
+
+ list_add(&v_map->list_entry, &sw->vsi_list_map_head);
+ return v_map;
+}
+
+/**
+ * ice_update_vsi_list_rule
+ * @hw: pointer to the hardware structure
+ * @vsi_handle_arr: array of VSI handles to form a VSI list
+ * @num_vsi: number of VSI handles in the array
+ * @vsi_list_id: VSI list ID generated as part of allocate resource
+ * @remove: Boolean value to indicate if this is a remove action
+ * @opc: switch rules population command type - pass in the command opcode
+ * @lkup_type: lookup type of the filter
+ *
+ * Call AQ command to add a new switch rule or update existing switch rule
+ * using the given VSI list ID
+ */
+static int
+ice_update_vsi_list_rule(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi,
+ u16 vsi_list_id, bool remove, enum ice_adminq_opc opc,
+ enum ice_sw_lkup_type lkup_type)
+{
+ struct ice_sw_rule_vsi_list *s_rule;
+ u16 s_rule_size;
+ u16 rule_type;
+ int status;
+ int i;
+
+ if (!num_vsi)
+ return -EINVAL;
+
+ if (lkup_type == ICE_SW_LKUP_MAC ||
+ lkup_type == ICE_SW_LKUP_MAC_VLAN ||
+ lkup_type == ICE_SW_LKUP_ETHERTYPE ||
+ lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC ||
+ lkup_type == ICE_SW_LKUP_PROMISC ||
+ lkup_type == ICE_SW_LKUP_PROMISC_VLAN ||
+ lkup_type == ICE_SW_LKUP_DFLT)
+ rule_type = remove ? ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR :
+ ICE_AQC_SW_RULES_T_VSI_LIST_SET;
+ else if (lkup_type == ICE_SW_LKUP_VLAN)
+ rule_type = remove ? ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR :
+ ICE_AQC_SW_RULES_T_PRUNE_LIST_SET;
+ else
+ return -EINVAL;
+
+ s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(s_rule, num_vsi);
+ s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL);
+ if (!s_rule)
+ return -ENOMEM;
+ for (i = 0; i < num_vsi; i++) {
+ if (!ice_is_vsi_valid(hw, vsi_handle_arr[i])) {
+ status = -EINVAL;
+ goto exit;
+ }
+ /* AQ call requires hw_vsi_id(s) */
+ s_rule->vsi[i] =
+ cpu_to_le16(ice_get_hw_vsi_num(hw, vsi_handle_arr[i]));
+ }
+
+ s_rule->hdr.type = cpu_to_le16(rule_type);
+ s_rule->number_vsi = cpu_to_le16(num_vsi);
+ s_rule->index = cpu_to_le16(vsi_list_id);
+
+ status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1, opc, NULL);
+
+exit:
+ devm_kfree(ice_hw_to_dev(hw), s_rule);
+ return status;
+}
+
+/**
+ * ice_create_vsi_list_rule - Creates and populates a VSI list rule
+ * @hw: pointer to the HW struct
+ * @vsi_handle_arr: array of VSI handles to form a VSI list
+ * @num_vsi: number of VSI handles in the array
+ * @vsi_list_id: stores the ID of the VSI list to be created
+ * @lkup_type: switch rule filter's lookup type
+ */
+static int
+ice_create_vsi_list_rule(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi,
+ u16 *vsi_list_id, enum ice_sw_lkup_type lkup_type)
+{
+ int status;
+
+ status = ice_aq_alloc_free_vsi_list(hw, vsi_list_id, lkup_type,
+ ice_aqc_opc_alloc_res);
+ if (status)
+ return status;
+
+ /* Update the newly created VSI list to include the specified VSIs */
+ return ice_update_vsi_list_rule(hw, vsi_handle_arr, num_vsi,
+ *vsi_list_id, false,
+ ice_aqc_opc_add_sw_rules, lkup_type);
+}
+
+/**
+ * ice_create_pkt_fwd_rule
+ * @hw: pointer to the hardware structure
+ * @f_entry: entry containing packet forwarding information
+ *
+ * Create switch rule with given filter information and add an entry
+ * to the corresponding filter management list to track this switch rule
+ * and VSI mapping
+ */
+static int
+ice_create_pkt_fwd_rule(struct ice_hw *hw,
+ struct ice_fltr_list_entry *f_entry)
+{
+ struct ice_fltr_mgmt_list_entry *fm_entry;
+ struct ice_sw_rule_lkup_rx_tx *s_rule;
+ enum ice_sw_lkup_type l_type;
+ struct ice_sw_recipe *recp;
+ int status;
+
+ s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+ ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule),
+ GFP_KERNEL);
+ if (!s_rule)
+ return -ENOMEM;
+ fm_entry = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*fm_entry),
+ GFP_KERNEL);
+ if (!fm_entry) {
+ status = -ENOMEM;
+ goto ice_create_pkt_fwd_rule_exit;
+ }
+
+ fm_entry->fltr_info = f_entry->fltr_info;
+
+ /* Initialize all the fields for the management entry */
+ fm_entry->vsi_count = 1;
+ fm_entry->lg_act_idx = ICE_INVAL_LG_ACT_INDEX;
+ fm_entry->sw_marker_id = ICE_INVAL_SW_MARKER_ID;
+ fm_entry->counter_index = ICE_INVAL_COUNTER_ID;
+
+ ice_fill_sw_rule(hw, &fm_entry->fltr_info, s_rule,
+ ice_aqc_opc_add_sw_rules);
+
+ status = ice_aq_sw_rules(hw, s_rule,
+ ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule), 1,
+ ice_aqc_opc_add_sw_rules, NULL);
+ if (status) {
+ devm_kfree(ice_hw_to_dev(hw), fm_entry);
+ goto ice_create_pkt_fwd_rule_exit;
+ }
+
+ f_entry->fltr_info.fltr_rule_id = le16_to_cpu(s_rule->index);
+ fm_entry->fltr_info.fltr_rule_id = le16_to_cpu(s_rule->index);
+
+ /* The book keeping entries will get removed when base driver
+ * calls remove filter AQ command
+ */
+ l_type = fm_entry->fltr_info.lkup_type;
+ recp = &hw->switch_info->recp_list[l_type];
+ list_add(&fm_entry->list_entry, &recp->filt_rules);
+
+ice_create_pkt_fwd_rule_exit:
+ devm_kfree(ice_hw_to_dev(hw), s_rule);
+ return status;
+}
+
+/**
+ * ice_update_pkt_fwd_rule
+ * @hw: pointer to the hardware structure
+ * @f_info: filter information for switch rule
+ *
+ * Call AQ command to update a previously created switch rule with a
+ * VSI list ID
+ */
+static int
+ice_update_pkt_fwd_rule(struct ice_hw *hw, struct ice_fltr_info *f_info)
+{
+ struct ice_sw_rule_lkup_rx_tx *s_rule;
+ int status;
+
+ s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+ ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule),
+ GFP_KERNEL);
+ if (!s_rule)
+ return -ENOMEM;
+
+ ice_fill_sw_rule(hw, f_info, s_rule, ice_aqc_opc_update_sw_rules);
+
+ s_rule->index = cpu_to_le16(f_info->fltr_rule_id);
+
+ /* Update switch rule with new rule set to forward VSI list */
+ status = ice_aq_sw_rules(hw, s_rule,
+ ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule), 1,
+ ice_aqc_opc_update_sw_rules, NULL);
+
+ devm_kfree(ice_hw_to_dev(hw), s_rule);
+ return status;
+}
+
+/**
+ * ice_update_sw_rule_bridge_mode
+ * @hw: pointer to the HW struct
+ *
+ * Updates unicast switch filter rules based on VEB/VEPA mode
+ */
+int ice_update_sw_rule_bridge_mode(struct ice_hw *hw)
+{
+ struct ice_switch_info *sw = hw->switch_info;
+ struct ice_fltr_mgmt_list_entry *fm_entry;
+ struct list_head *rule_head;
+ struct mutex *rule_lock; /* Lock to protect filter rule list */
+ int status = 0;
+
+ rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
+ rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
+
+ mutex_lock(rule_lock);
+ list_for_each_entry(fm_entry, rule_head, list_entry) {
+ struct ice_fltr_info *fi = &fm_entry->fltr_info;
+ u8 *addr = fi->l_data.mac.mac_addr;
+
+ /* Update unicast Tx rules to reflect the selected
+ * VEB/VEPA mode
+ */
+ if ((fi->flag & ICE_FLTR_TX) && is_unicast_ether_addr(addr) &&
+ (fi->fltr_act == ICE_FWD_TO_VSI ||
+ fi->fltr_act == ICE_FWD_TO_VSI_LIST ||
+ fi->fltr_act == ICE_FWD_TO_Q ||
+ fi->fltr_act == ICE_FWD_TO_QGRP)) {
+ status = ice_update_pkt_fwd_rule(hw, fi);
+ if (status)
+ break;
+ }
+ }
+
+ mutex_unlock(rule_lock);
+
+ return status;
+}
+
+/**
+ * ice_add_update_vsi_list
+ * @hw: pointer to the hardware structure
+ * @m_entry: pointer to current filter management list entry
+ * @cur_fltr: filter information from the book keeping entry
+ * @new_fltr: filter information with the new VSI to be added
+ *
+ * Call AQ command to add or update previously created VSI list with new VSI.
+ *
+ * Helper function to do book keeping associated with adding filter information
+ * The algorithm to do the book keeping is described below :
+ * When a VSI needs to subscribe to a given filter (MAC/VLAN/Ethtype etc.)
+ * if only one VSI has been added till now
+ * Allocate a new VSI list and add two VSIs
+ * to this list using switch rule command
+ * Update the previously created switch rule with the
+ * newly created VSI list ID
+ * if a VSI list was previously created
+ * Add the new VSI to the previously created VSI list set
+ * using the update switch rule command
+ */
+static int
+ice_add_update_vsi_list(struct ice_hw *hw,
+ struct ice_fltr_mgmt_list_entry *m_entry,
+ struct ice_fltr_info *cur_fltr,
+ struct ice_fltr_info *new_fltr)
+{
+ u16 vsi_list_id = 0;
+ int status = 0;
+
+ if ((cur_fltr->fltr_act == ICE_FWD_TO_Q ||
+ cur_fltr->fltr_act == ICE_FWD_TO_QGRP))
+ return -EOPNOTSUPP;
+
+ if ((new_fltr->fltr_act == ICE_FWD_TO_Q ||
+ new_fltr->fltr_act == ICE_FWD_TO_QGRP) &&
+ (cur_fltr->fltr_act == ICE_FWD_TO_VSI ||
+ cur_fltr->fltr_act == ICE_FWD_TO_VSI_LIST))
+ return -EOPNOTSUPP;
+
+ if (m_entry->vsi_count < 2 && !m_entry->vsi_list_info) {
+ /* Only one entry existed in the mapping and it was not already
+ * a part of a VSI list. So, create a VSI list with the old and
+ * new VSIs.
+ */
+ struct ice_fltr_info tmp_fltr;
+ u16 vsi_handle_arr[2];
+
+ /* A rule already exists with the new VSI being added */
+ if (cur_fltr->fwd_id.hw_vsi_id == new_fltr->fwd_id.hw_vsi_id)
+ return -EEXIST;
+
+ vsi_handle_arr[0] = cur_fltr->vsi_handle;
+ vsi_handle_arr[1] = new_fltr->vsi_handle;
+ status = ice_create_vsi_list_rule(hw, &vsi_handle_arr[0], 2,
+ &vsi_list_id,
+ new_fltr->lkup_type);
+ if (status)
+ return status;
+
+ tmp_fltr = *new_fltr;
+ tmp_fltr.fltr_rule_id = cur_fltr->fltr_rule_id;
+ tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST;
+ tmp_fltr.fwd_id.vsi_list_id = vsi_list_id;
+ /* Update the previous switch rule of "MAC forward to VSI" to
+ * "MAC fwd to VSI list"
+ */
+ status = ice_update_pkt_fwd_rule(hw, &tmp_fltr);
+ if (status)
+ return status;
+
+ cur_fltr->fwd_id.vsi_list_id = vsi_list_id;
+ cur_fltr->fltr_act = ICE_FWD_TO_VSI_LIST;
+ m_entry->vsi_list_info =
+ ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2,
+ vsi_list_id);
+
+ if (!m_entry->vsi_list_info)
+ return -ENOMEM;
+
+ /* If this entry was large action then the large action needs
+ * to be updated to point to FWD to VSI list
+ */
+ if (m_entry->sw_marker_id != ICE_INVAL_SW_MARKER_ID)
+ status =
+ ice_add_marker_act(hw, m_entry,
+ m_entry->sw_marker_id,
+ m_entry->lg_act_idx);
+ } else {
+ u16 vsi_handle = new_fltr->vsi_handle;
+ enum ice_adminq_opc opcode;
+
+ if (!m_entry->vsi_list_info)
+ return -EIO;
+
+ /* A rule already exists with the new VSI being added */
+ if (test_bit(vsi_handle, m_entry->vsi_list_info->vsi_map))
+ return 0;
+
+ /* Update the previously created VSI list set with
+ * the new VSI ID passed in
+ */
+ vsi_list_id = cur_fltr->fwd_id.vsi_list_id;
+ opcode = ice_aqc_opc_update_sw_rules;
+
+ status = ice_update_vsi_list_rule(hw, &vsi_handle, 1,
+ vsi_list_id, false, opcode,
+ new_fltr->lkup_type);
+ /* update VSI list mapping info with new VSI ID */
+ if (!status)
+ set_bit(vsi_handle, m_entry->vsi_list_info->vsi_map);
+ }
+ if (!status)
+ m_entry->vsi_count++;
+ return status;
+}
+
+/**
+ * ice_find_rule_entry - Search a rule entry
+ * @hw: pointer to the hardware structure
+ * @recp_id: lookup type for which the specified rule needs to be searched
+ * @f_info: rule information
+ *
+ * Helper function to search for a given rule entry
+ * Returns pointer to entry storing the rule if found
+ */
+static struct ice_fltr_mgmt_list_entry *
+ice_find_rule_entry(struct ice_hw *hw, u8 recp_id, struct ice_fltr_info *f_info)
+{
+ struct ice_fltr_mgmt_list_entry *list_itr, *ret = NULL;
+ struct ice_switch_info *sw = hw->switch_info;
+ struct list_head *list_head;
+
+ list_head = &sw->recp_list[recp_id].filt_rules;
+ list_for_each_entry(list_itr, list_head, list_entry) {
+ if (!memcmp(&f_info->l_data, &list_itr->fltr_info.l_data,
+ sizeof(f_info->l_data)) &&
+ f_info->flag == list_itr->fltr_info.flag) {
+ ret = list_itr;
+ break;
+ }
+ }
+ return ret;
+}
+
+/**
+ * ice_find_vsi_list_entry - Search VSI list map with VSI count 1
+ * @hw: pointer to the hardware structure
+ * @recp_id: lookup type for which VSI lists needs to be searched
+ * @vsi_handle: VSI handle to be found in VSI list
+ * @vsi_list_id: VSI list ID found containing vsi_handle
+ *
+ * Helper function to search a VSI list with single entry containing given VSI
+ * handle element. This can be extended further to search VSI list with more
+ * than 1 vsi_count. Returns pointer to VSI list entry if found.
+ */
+static struct ice_vsi_list_map_info *
+ice_find_vsi_list_entry(struct ice_hw *hw, u8 recp_id, u16 vsi_handle,
+ u16 *vsi_list_id)
+{
+ struct ice_vsi_list_map_info *map_info = NULL;
+ struct ice_switch_info *sw = hw->switch_info;
+ struct ice_fltr_mgmt_list_entry *list_itr;
+ struct list_head *list_head;
+
+ list_head = &sw->recp_list[recp_id].filt_rules;
+ list_for_each_entry(list_itr, list_head, list_entry) {
+ if (list_itr->vsi_count == 1 && list_itr->vsi_list_info) {
+ map_info = list_itr->vsi_list_info;
+ if (test_bit(vsi_handle, map_info->vsi_map)) {
+ *vsi_list_id = map_info->vsi_list_id;
+ return map_info;
+ }
+ }
+ }
+ return NULL;
+}
+
+/**
+ * ice_add_rule_internal - add rule for a given lookup type
+ * @hw: pointer to the hardware structure
+ * @recp_id: lookup type (recipe ID) for which rule has to be added
+ * @f_entry: structure containing MAC forwarding information
+ *
+ * Adds or updates the rule lists for a given recipe
+ */
+static int
+ice_add_rule_internal(struct ice_hw *hw, u8 recp_id,
+ struct ice_fltr_list_entry *f_entry)
+{
+ struct ice_switch_info *sw = hw->switch_info;
+ struct ice_fltr_info *new_fltr, *cur_fltr;
+ struct ice_fltr_mgmt_list_entry *m_entry;
+ struct mutex *rule_lock; /* Lock to protect filter rule list */
+ int status = 0;
+
+ if (!ice_is_vsi_valid(hw, f_entry->fltr_info.vsi_handle))
+ return -EINVAL;
+ f_entry->fltr_info.fwd_id.hw_vsi_id =
+ ice_get_hw_vsi_num(hw, f_entry->fltr_info.vsi_handle);
+
+ rule_lock = &sw->recp_list[recp_id].filt_rule_lock;
+
+ mutex_lock(rule_lock);
+ new_fltr = &f_entry->fltr_info;
+ if (new_fltr->flag & ICE_FLTR_RX)
+ new_fltr->src = hw->port_info->lport;
+ else if (new_fltr->flag & ICE_FLTR_TX)
+ new_fltr->src = f_entry->fltr_info.fwd_id.hw_vsi_id;
+
+ m_entry = ice_find_rule_entry(hw, recp_id, new_fltr);
+ if (!m_entry) {
+ mutex_unlock(rule_lock);
+ return ice_create_pkt_fwd_rule(hw, f_entry);
+ }
+
+ cur_fltr = &m_entry->fltr_info;
+ status = ice_add_update_vsi_list(hw, m_entry, cur_fltr, new_fltr);
+ mutex_unlock(rule_lock);
+
+ return status;
+}
+
+/**
+ * ice_remove_vsi_list_rule
+ * @hw: pointer to the hardware structure
+ * @vsi_list_id: VSI list ID generated as part of allocate resource
+ * @lkup_type: switch rule filter lookup type
+ *
+ * The VSI list should be emptied before this function is called to remove the
+ * VSI list.
+ */
+static int
+ice_remove_vsi_list_rule(struct ice_hw *hw, u16 vsi_list_id,
+ enum ice_sw_lkup_type lkup_type)
+{
+ struct ice_sw_rule_vsi_list *s_rule;
+ u16 s_rule_size;
+ int status;
+
+ s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(s_rule, 0);
+ s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL);
+ if (!s_rule)
+ return -ENOMEM;
+
+ s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR);
+ s_rule->index = cpu_to_le16(vsi_list_id);
+
+ /* Free the vsi_list resource that we allocated. It is assumed that the
+ * list is empty at this point.
+ */
+ status = ice_aq_alloc_free_vsi_list(hw, &vsi_list_id, lkup_type,
+ ice_aqc_opc_free_res);
+
+ devm_kfree(ice_hw_to_dev(hw), s_rule);
+ return status;
+}
+
+/**
+ * ice_rem_update_vsi_list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle of the VSI to remove
+ * @fm_list: filter management entry for which the VSI list management needs to
+ * be done
+ */
+static int
+ice_rem_update_vsi_list(struct ice_hw *hw, u16 vsi_handle,
+ struct ice_fltr_mgmt_list_entry *fm_list)
+{
+ enum ice_sw_lkup_type lkup_type;
+ u16 vsi_list_id;
+ int status = 0;
+
+ if (fm_list->fltr_info.fltr_act != ICE_FWD_TO_VSI_LIST ||
+ fm_list->vsi_count == 0)
+ return -EINVAL;
+
+ /* A rule with the VSI being removed does not exist */
+ if (!test_bit(vsi_handle, fm_list->vsi_list_info->vsi_map))
+ return -ENOENT;
+
+ lkup_type = fm_list->fltr_info.lkup_type;
+ vsi_list_id = fm_list->fltr_info.fwd_id.vsi_list_id;
+ status = ice_update_vsi_list_rule(hw, &vsi_handle, 1, vsi_list_id, true,
+ ice_aqc_opc_update_sw_rules,
+ lkup_type);
+ if (status)
+ return status;
+
+ fm_list->vsi_count--;
+ clear_bit(vsi_handle, fm_list->vsi_list_info->vsi_map);
+
+ if (fm_list->vsi_count == 1 && lkup_type != ICE_SW_LKUP_VLAN) {
+ struct ice_fltr_info tmp_fltr_info = fm_list->fltr_info;
+ struct ice_vsi_list_map_info *vsi_list_info =
+ fm_list->vsi_list_info;
+ u16 rem_vsi_handle;
+
+ rem_vsi_handle = find_first_bit(vsi_list_info->vsi_map,
+ ICE_MAX_VSI);
+ if (!ice_is_vsi_valid(hw, rem_vsi_handle))
+ return -EIO;
+
+ /* Make sure VSI list is empty before removing it below */
+ status = ice_update_vsi_list_rule(hw, &rem_vsi_handle, 1,
+ vsi_list_id, true,
+ ice_aqc_opc_update_sw_rules,
+ lkup_type);
+ if (status)
+ return status;
+
+ tmp_fltr_info.fltr_act = ICE_FWD_TO_VSI;
+ tmp_fltr_info.fwd_id.hw_vsi_id =
+ ice_get_hw_vsi_num(hw, rem_vsi_handle);
+ tmp_fltr_info.vsi_handle = rem_vsi_handle;
+ status = ice_update_pkt_fwd_rule(hw, &tmp_fltr_info);
+ if (status) {
+ ice_debug(hw, ICE_DBG_SW, "Failed to update pkt fwd rule to FWD_TO_VSI on HW VSI %d, error %d\n",
+ tmp_fltr_info.fwd_id.hw_vsi_id, status);
+ return status;
+ }
+
+ fm_list->fltr_info = tmp_fltr_info;
+ }
+
+ if ((fm_list->vsi_count == 1 && lkup_type != ICE_SW_LKUP_VLAN) ||
+ (fm_list->vsi_count == 0 && lkup_type == ICE_SW_LKUP_VLAN)) {
+ struct ice_vsi_list_map_info *vsi_list_info =
+ fm_list->vsi_list_info;
+
+ /* Remove the VSI list since it is no longer used */
+ status = ice_remove_vsi_list_rule(hw, vsi_list_id, lkup_type);
+ if (status) {
+ ice_debug(hw, ICE_DBG_SW, "Failed to remove VSI list %d, error %d\n",
+ vsi_list_id, status);
+ return status;
+ }
+
+ list_del(&vsi_list_info->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), vsi_list_info);
+ fm_list->vsi_list_info = NULL;
+ }
+
+ return status;
+}
+
+/**
+ * ice_remove_rule_internal - Remove a filter rule of a given type
+ * @hw: pointer to the hardware structure
+ * @recp_id: recipe ID for which the rule needs to removed
+ * @f_entry: rule entry containing filter information
+ */
+static int
+ice_remove_rule_internal(struct ice_hw *hw, u8 recp_id,
+ struct ice_fltr_list_entry *f_entry)
+{
+ struct ice_switch_info *sw = hw->switch_info;
+ struct ice_fltr_mgmt_list_entry *list_elem;
+ struct mutex *rule_lock; /* Lock to protect filter rule list */
+ bool remove_rule = false;
+ u16 vsi_handle;
+ int status = 0;
+
+ if (!ice_is_vsi_valid(hw, f_entry->fltr_info.vsi_handle))
+ return -EINVAL;
+ f_entry->fltr_info.fwd_id.hw_vsi_id =
+ ice_get_hw_vsi_num(hw, f_entry->fltr_info.vsi_handle);
+
+ rule_lock = &sw->recp_list[recp_id].filt_rule_lock;
+ mutex_lock(rule_lock);
+ list_elem = ice_find_rule_entry(hw, recp_id, &f_entry->fltr_info);
+ if (!list_elem) {
+ status = -ENOENT;
+ goto exit;
+ }
+
+ if (list_elem->fltr_info.fltr_act != ICE_FWD_TO_VSI_LIST) {
+ remove_rule = true;
+ } else if (!list_elem->vsi_list_info) {
+ status = -ENOENT;
+ goto exit;
+ } else if (list_elem->vsi_list_info->ref_cnt > 1) {
+ /* a ref_cnt > 1 indicates that the vsi_list is being
+ * shared by multiple rules. Decrement the ref_cnt and
+ * remove this rule, but do not modify the list, as it
+ * is in-use by other rules.
+ */
+ list_elem->vsi_list_info->ref_cnt--;
+ remove_rule = true;
+ } else {
+ /* a ref_cnt of 1 indicates the vsi_list is only used
+ * by one rule. However, the original removal request is only
+ * for a single VSI. Update the vsi_list first, and only
+ * remove the rule if there are no further VSIs in this list.
+ */
+ vsi_handle = f_entry->fltr_info.vsi_handle;
+ status = ice_rem_update_vsi_list(hw, vsi_handle, list_elem);
+ if (status)
+ goto exit;
+ /* if VSI count goes to zero after updating the VSI list */
+ if (list_elem->vsi_count == 0)
+ remove_rule = true;
+ }
+
+ if (remove_rule) {
+ /* Remove the lookup rule */
+ struct ice_sw_rule_lkup_rx_tx *s_rule;
+
+ s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+ ICE_SW_RULE_RX_TX_NO_HDR_SIZE(s_rule),
+ GFP_KERNEL);
+ if (!s_rule) {
+ status = -ENOMEM;
+ goto exit;
+ }
+
+ ice_fill_sw_rule(hw, &list_elem->fltr_info, s_rule,
+ ice_aqc_opc_remove_sw_rules);
+
+ status = ice_aq_sw_rules(hw, s_rule,
+ ICE_SW_RULE_RX_TX_NO_HDR_SIZE(s_rule),
+ 1, ice_aqc_opc_remove_sw_rules, NULL);
+
+ /* Remove a book keeping from the list */
+ devm_kfree(ice_hw_to_dev(hw), s_rule);
+
+ if (status)
+ goto exit;
+
+ list_del(&list_elem->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), list_elem);
+ }
+exit:
+ mutex_unlock(rule_lock);
+ return status;
+}
+
+/**
+ * ice_mac_fltr_exist - does this MAC filter exist for given VSI
+ * @hw: pointer to the hardware structure
+ * @mac: MAC address to be checked (for MAC filter)
+ * @vsi_handle: check MAC filter for this VSI
+ */
+bool ice_mac_fltr_exist(struct ice_hw *hw, u8 *mac, u16 vsi_handle)
+{
+ struct ice_fltr_mgmt_list_entry *entry;
+ struct list_head *rule_head;
+ struct ice_switch_info *sw;
+ struct mutex *rule_lock; /* Lock to protect filter rule list */
+ u16 hw_vsi_id;
+
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return false;
+
+ hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+ sw = hw->switch_info;
+ rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
+ if (!rule_head)
+ return false;
+
+ rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
+ mutex_lock(rule_lock);
+ list_for_each_entry(entry, rule_head, list_entry) {
+ struct ice_fltr_info *f_info = &entry->fltr_info;
+ u8 *mac_addr = &f_info->l_data.mac.mac_addr[0];
+
+ if (is_zero_ether_addr(mac_addr))
+ continue;
+
+ if (f_info->flag != ICE_FLTR_TX ||
+ f_info->src_id != ICE_SRC_ID_VSI ||
+ f_info->lkup_type != ICE_SW_LKUP_MAC ||
+ f_info->fltr_act != ICE_FWD_TO_VSI ||
+ hw_vsi_id != f_info->fwd_id.hw_vsi_id)
+ continue;
+
+ if (ether_addr_equal(mac, mac_addr)) {
+ mutex_unlock(rule_lock);
+ return true;
+ }
+ }
+ mutex_unlock(rule_lock);
+ return false;
+}
+
+/**
+ * ice_vlan_fltr_exist - does this VLAN filter exist for given VSI
+ * @hw: pointer to the hardware structure
+ * @vlan_id: VLAN ID
+ * @vsi_handle: check MAC filter for this VSI
+ */
+bool ice_vlan_fltr_exist(struct ice_hw *hw, u16 vlan_id, u16 vsi_handle)
+{
+ struct ice_fltr_mgmt_list_entry *entry;
+ struct list_head *rule_head;
+ struct ice_switch_info *sw;
+ struct mutex *rule_lock; /* Lock to protect filter rule list */
+ u16 hw_vsi_id;
+
+ if (vlan_id > ICE_MAX_VLAN_ID)
+ return false;
+
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return false;
+
+ hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+ sw = hw->switch_info;
+ rule_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules;
+ if (!rule_head)
+ return false;
+
+ rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
+ mutex_lock(rule_lock);
+ list_for_each_entry(entry, rule_head, list_entry) {
+ struct ice_fltr_info *f_info = &entry->fltr_info;
+ u16 entry_vlan_id = f_info->l_data.vlan.vlan_id;
+ struct ice_vsi_list_map_info *map_info;
+
+ if (entry_vlan_id > ICE_MAX_VLAN_ID)
+ continue;
+
+ if (f_info->flag != ICE_FLTR_TX ||
+ f_info->src_id != ICE_SRC_ID_VSI ||
+ f_info->lkup_type != ICE_SW_LKUP_VLAN)
+ continue;
+
+ /* Only allowed filter action are FWD_TO_VSI/_VSI_LIST */
+ if (f_info->fltr_act != ICE_FWD_TO_VSI &&
+ f_info->fltr_act != ICE_FWD_TO_VSI_LIST)
+ continue;
+
+ if (f_info->fltr_act == ICE_FWD_TO_VSI) {
+ if (hw_vsi_id != f_info->fwd_id.hw_vsi_id)
+ continue;
+ } else if (f_info->fltr_act == ICE_FWD_TO_VSI_LIST) {
+ /* If filter_action is FWD_TO_VSI_LIST, make sure
+ * that VSI being checked is part of VSI list
+ */
+ if (entry->vsi_count == 1 &&
+ entry->vsi_list_info) {
+ map_info = entry->vsi_list_info;
+ if (!test_bit(vsi_handle, map_info->vsi_map))
+ continue;
+ }
+ }
+
+ if (vlan_id == entry_vlan_id) {
+ mutex_unlock(rule_lock);
+ return true;
+ }
+ }
+ mutex_unlock(rule_lock);
+
+ return false;
+}
+
+/**
+ * ice_add_mac - Add a MAC address based filter rule
+ * @hw: pointer to the hardware structure
+ * @m_list: list of MAC addresses and forwarding information
+ */
+int ice_add_mac(struct ice_hw *hw, struct list_head *m_list)
+{
+ struct ice_fltr_list_entry *m_list_itr;
+ int status = 0;
+
+ if (!m_list || !hw)
+ return -EINVAL;
+
+ list_for_each_entry(m_list_itr, m_list, list_entry) {
+ u8 *add = &m_list_itr->fltr_info.l_data.mac.mac_addr[0];
+ u16 vsi_handle;
+ u16 hw_vsi_id;
+
+ m_list_itr->fltr_info.flag = ICE_FLTR_TX;
+ vsi_handle = m_list_itr->fltr_info.vsi_handle;
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+ hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+ m_list_itr->fltr_info.fwd_id.hw_vsi_id = hw_vsi_id;
+ /* update the src in case it is VSI num */
+ if (m_list_itr->fltr_info.src_id != ICE_SRC_ID_VSI)
+ return -EINVAL;
+ m_list_itr->fltr_info.src = hw_vsi_id;
+ if (m_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_MAC ||
+ is_zero_ether_addr(add))
+ return -EINVAL;
+
+ m_list_itr->status = ice_add_rule_internal(hw, ICE_SW_LKUP_MAC,
+ m_list_itr);
+ if (m_list_itr->status)
+ return m_list_itr->status;
+ }
+
+ return status;
+}
+
+/**
+ * ice_add_vlan_internal - Add one VLAN based filter rule
+ * @hw: pointer to the hardware structure
+ * @f_entry: filter entry containing one VLAN information
+ */
+static int
+ice_add_vlan_internal(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry)
+{
+ struct ice_switch_info *sw = hw->switch_info;
+ struct ice_fltr_mgmt_list_entry *v_list_itr;
+ struct ice_fltr_info *new_fltr, *cur_fltr;
+ enum ice_sw_lkup_type lkup_type;
+ u16 vsi_list_id = 0, vsi_handle;
+ struct mutex *rule_lock; /* Lock to protect filter rule list */
+ int status = 0;
+
+ if (!ice_is_vsi_valid(hw, f_entry->fltr_info.vsi_handle))
+ return -EINVAL;
+
+ f_entry->fltr_info.fwd_id.hw_vsi_id =
+ ice_get_hw_vsi_num(hw, f_entry->fltr_info.vsi_handle);
+ new_fltr = &f_entry->fltr_info;
+
+ /* VLAN ID should only be 12 bits */
+ if (new_fltr->l_data.vlan.vlan_id > ICE_MAX_VLAN_ID)
+ return -EINVAL;
+
+ if (new_fltr->src_id != ICE_SRC_ID_VSI)
+ return -EINVAL;
+
+ new_fltr->src = new_fltr->fwd_id.hw_vsi_id;
+ lkup_type = new_fltr->lkup_type;
+ vsi_handle = new_fltr->vsi_handle;
+ rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
+ mutex_lock(rule_lock);
+ v_list_itr = ice_find_rule_entry(hw, ICE_SW_LKUP_VLAN, new_fltr);
+ if (!v_list_itr) {
+ struct ice_vsi_list_map_info *map_info = NULL;
+
+ if (new_fltr->fltr_act == ICE_FWD_TO_VSI) {
+ /* All VLAN pruning rules use a VSI list. Check if
+ * there is already a VSI list containing VSI that we
+ * want to add. If found, use the same vsi_list_id for
+ * this new VLAN rule or else create a new list.
+ */
+ map_info = ice_find_vsi_list_entry(hw, ICE_SW_LKUP_VLAN,
+ vsi_handle,
+ &vsi_list_id);
+ if (!map_info) {
+ status = ice_create_vsi_list_rule(hw,
+ &vsi_handle,
+ 1,
+ &vsi_list_id,
+ lkup_type);
+ if (status)
+ goto exit;
+ }
+ /* Convert the action to forwarding to a VSI list. */
+ new_fltr->fltr_act = ICE_FWD_TO_VSI_LIST;
+ new_fltr->fwd_id.vsi_list_id = vsi_list_id;
+ }
+
+ status = ice_create_pkt_fwd_rule(hw, f_entry);
+ if (!status) {
+ v_list_itr = ice_find_rule_entry(hw, ICE_SW_LKUP_VLAN,
+ new_fltr);
+ if (!v_list_itr) {
+ status = -ENOENT;
+ goto exit;
+ }
+ /* reuse VSI list for new rule and increment ref_cnt */
+ if (map_info) {
+ v_list_itr->vsi_list_info = map_info;
+ map_info->ref_cnt++;
+ } else {
+ v_list_itr->vsi_list_info =
+ ice_create_vsi_list_map(hw, &vsi_handle,
+ 1, vsi_list_id);
+ }
+ }
+ } else if (v_list_itr->vsi_list_info->ref_cnt == 1) {
+ /* Update existing VSI list to add new VSI ID only if it used
+ * by one VLAN rule.
+ */
+ cur_fltr = &v_list_itr->fltr_info;
+ status = ice_add_update_vsi_list(hw, v_list_itr, cur_fltr,
+ new_fltr);
+ } else {
+ /* If VLAN rule exists and VSI list being used by this rule is
+ * referenced by more than 1 VLAN rule. Then create a new VSI
+ * list appending previous VSI with new VSI and update existing
+ * VLAN rule to point to new VSI list ID
+ */
+ struct ice_fltr_info tmp_fltr;
+ u16 vsi_handle_arr[2];
+ u16 cur_handle;
+
+ /* Current implementation only supports reusing VSI list with
+ * one VSI count. We should never hit below condition
+ */
+ if (v_list_itr->vsi_count > 1 &&
+ v_list_itr->vsi_list_info->ref_cnt > 1) {
+ ice_debug(hw, ICE_DBG_SW, "Invalid configuration: Optimization to reuse VSI list with more than one VSI is not being done yet\n");
+ status = -EIO;
+ goto exit;
+ }
+
+ cur_handle =
+ find_first_bit(v_list_itr->vsi_list_info->vsi_map,
+ ICE_MAX_VSI);
+
+ /* A rule already exists with the new VSI being added */
+ if (cur_handle == vsi_handle) {
+ status = -EEXIST;
+ goto exit;
+ }
+
+ vsi_handle_arr[0] = cur_handle;
+ vsi_handle_arr[1] = vsi_handle;
+ status = ice_create_vsi_list_rule(hw, &vsi_handle_arr[0], 2,
+ &vsi_list_id, lkup_type);
+ if (status)
+ goto exit;
+
+ tmp_fltr = v_list_itr->fltr_info;
+ tmp_fltr.fltr_rule_id = v_list_itr->fltr_info.fltr_rule_id;
+ tmp_fltr.fwd_id.vsi_list_id = vsi_list_id;
+ tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST;
+ /* Update the previous switch rule to a new VSI list which
+ * includes current VSI that is requested
+ */
+ status = ice_update_pkt_fwd_rule(hw, &tmp_fltr);
+ if (status)
+ goto exit;
+
+ /* before overriding VSI list map info. decrement ref_cnt of
+ * previous VSI list
+ */
+ v_list_itr->vsi_list_info->ref_cnt--;
+
+ /* now update to newly created list */
+ v_list_itr->fltr_info.fwd_id.vsi_list_id = vsi_list_id;
+ v_list_itr->vsi_list_info =
+ ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2,
+ vsi_list_id);
+ v_list_itr->vsi_count++;
+ }
+
+exit:
+ mutex_unlock(rule_lock);
+ return status;
+}
+
+/**
+ * ice_add_vlan - Add VLAN based filter rule
+ * @hw: pointer to the hardware structure
+ * @v_list: list of VLAN entries and forwarding information
+ */
+int ice_add_vlan(struct ice_hw *hw, struct list_head *v_list)
+{
+ struct ice_fltr_list_entry *v_list_itr;
+
+ if (!v_list || !hw)
+ return -EINVAL;
+
+ list_for_each_entry(v_list_itr, v_list, list_entry) {
+ if (v_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_VLAN)
+ return -EINVAL;
+ v_list_itr->fltr_info.flag = ICE_FLTR_TX;
+ v_list_itr->status = ice_add_vlan_internal(hw, v_list_itr);
+ if (v_list_itr->status)
+ return v_list_itr->status;
+ }
+ return 0;
+}
+
+/**
+ * ice_add_eth_mac - Add ethertype and MAC based filter rule
+ * @hw: pointer to the hardware structure
+ * @em_list: list of ether type MAC filter, MAC is optional
+ *
+ * This function requires the caller to populate the entries in
+ * the filter list with the necessary fields (including flags to
+ * indicate Tx or Rx rules).
+ */
+int ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list)
+{
+ struct ice_fltr_list_entry *em_list_itr;
+
+ if (!em_list || !hw)
+ return -EINVAL;
+
+ list_for_each_entry(em_list_itr, em_list, list_entry) {
+ enum ice_sw_lkup_type l_type =
+ em_list_itr->fltr_info.lkup_type;
+
+ if (l_type != ICE_SW_LKUP_ETHERTYPE_MAC &&
+ l_type != ICE_SW_LKUP_ETHERTYPE)
+ return -EINVAL;
+
+ em_list_itr->status = ice_add_rule_internal(hw, l_type,
+ em_list_itr);
+ if (em_list_itr->status)
+ return em_list_itr->status;
+ }
+ return 0;
+}
+
+/**
+ * ice_remove_eth_mac - Remove an ethertype (or MAC) based filter rule
+ * @hw: pointer to the hardware structure
+ * @em_list: list of ethertype or ethertype MAC entries
+ */
+int ice_remove_eth_mac(struct ice_hw *hw, struct list_head *em_list)
+{
+ struct ice_fltr_list_entry *em_list_itr, *tmp;
+
+ if (!em_list || !hw)
+ return -EINVAL;
+
+ list_for_each_entry_safe(em_list_itr, tmp, em_list, list_entry) {
+ enum ice_sw_lkup_type l_type =
+ em_list_itr->fltr_info.lkup_type;
+
+ if (l_type != ICE_SW_LKUP_ETHERTYPE_MAC &&
+ l_type != ICE_SW_LKUP_ETHERTYPE)
+ return -EINVAL;
+
+ em_list_itr->status = ice_remove_rule_internal(hw, l_type,
+ em_list_itr);
+ if (em_list_itr->status)
+ return em_list_itr->status;
+ }
+ return 0;
+}
+
+/**
+ * ice_rem_sw_rule_info
+ * @hw: pointer to the hardware structure
+ * @rule_head: pointer to the switch list structure that we want to delete
+ */
+static void
+ice_rem_sw_rule_info(struct ice_hw *hw, struct list_head *rule_head)
+{
+ if (!list_empty(rule_head)) {
+ struct ice_fltr_mgmt_list_entry *entry;
+ struct ice_fltr_mgmt_list_entry *tmp;
+
+ list_for_each_entry_safe(entry, tmp, rule_head, list_entry) {
+ list_del(&entry->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), entry);
+ }
+ }
+}
+
+/**
+ * ice_rem_adv_rule_info
+ * @hw: pointer to the hardware structure
+ * @rule_head: pointer to the switch list structure that we want to delete
+ */
+static void
+ice_rem_adv_rule_info(struct ice_hw *hw, struct list_head *rule_head)
+{
+ struct ice_adv_fltr_mgmt_list_entry *tmp_entry;
+ struct ice_adv_fltr_mgmt_list_entry *lst_itr;
+
+ if (list_empty(rule_head))
+ return;
+
+ list_for_each_entry_safe(lst_itr, tmp_entry, rule_head, list_entry) {
+ list_del(&lst_itr->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), lst_itr->lkups);
+ devm_kfree(ice_hw_to_dev(hw), lst_itr);
+ }
+}
+
+/**
+ * ice_cfg_dflt_vsi - change state of VSI to set/clear default
+ * @pi: pointer to the port_info structure
+ * @vsi_handle: VSI handle to set as default
+ * @set: true to add the above mentioned switch rule, false to remove it
+ * @direction: ICE_FLTR_RX or ICE_FLTR_TX
+ *
+ * add filter rule to set/unset given VSI as default VSI for the switch
+ * (represented by swid)
+ */
+int
+ice_cfg_dflt_vsi(struct ice_port_info *pi, u16 vsi_handle, bool set,
+ u8 direction)
+{
+ struct ice_fltr_list_entry f_list_entry;
+ struct ice_fltr_info f_info;
+ struct ice_hw *hw = pi->hw;
+ u16 hw_vsi_id;
+ int status;
+
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+
+ hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+
+ memset(&f_info, 0, sizeof(f_info));
+
+ f_info.lkup_type = ICE_SW_LKUP_DFLT;
+ f_info.flag = direction;
+ f_info.fltr_act = ICE_FWD_TO_VSI;
+ f_info.fwd_id.hw_vsi_id = hw_vsi_id;
+ f_info.vsi_handle = vsi_handle;
+
+ if (f_info.flag & ICE_FLTR_RX) {
+ f_info.src = hw->port_info->lport;
+ f_info.src_id = ICE_SRC_ID_LPORT;
+ } else if (f_info.flag & ICE_FLTR_TX) {
+ f_info.src_id = ICE_SRC_ID_VSI;
+ f_info.src = hw_vsi_id;
+ }
+ f_list_entry.fltr_info = f_info;
+
+ if (set)
+ status = ice_add_rule_internal(hw, ICE_SW_LKUP_DFLT,
+ &f_list_entry);
+ else
+ status = ice_remove_rule_internal(hw, ICE_SW_LKUP_DFLT,
+ &f_list_entry);
+
+ return status;
+}
+
+/**
+ * ice_vsi_uses_fltr - Determine if given VSI uses specified filter
+ * @fm_entry: filter entry to inspect
+ * @vsi_handle: VSI handle to compare with filter info
+ */
+static bool
+ice_vsi_uses_fltr(struct ice_fltr_mgmt_list_entry *fm_entry, u16 vsi_handle)
+{
+ return ((fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI &&
+ fm_entry->fltr_info.vsi_handle == vsi_handle) ||
+ (fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI_LIST &&
+ fm_entry->vsi_list_info &&
+ (test_bit(vsi_handle, fm_entry->vsi_list_info->vsi_map))));
+}
+
+/**
+ * ice_check_if_dflt_vsi - check if VSI is default VSI
+ * @pi: pointer to the port_info structure
+ * @vsi_handle: vsi handle to check for in filter list
+ * @rule_exists: indicates if there are any VSI's in the rule list
+ *
+ * checks if the VSI is in a default VSI list, and also indicates
+ * if the default VSI list is empty
+ */
+bool
+ice_check_if_dflt_vsi(struct ice_port_info *pi, u16 vsi_handle,
+ bool *rule_exists)
+{
+ struct ice_fltr_mgmt_list_entry *fm_entry;
+ struct ice_sw_recipe *recp_list;
+ struct list_head *rule_head;
+ struct mutex *rule_lock; /* Lock to protect filter rule list */
+ bool ret = false;
+
+ recp_list = &pi->hw->switch_info->recp_list[ICE_SW_LKUP_DFLT];
+ rule_lock = &recp_list->filt_rule_lock;
+ rule_head = &recp_list->filt_rules;
+
+ mutex_lock(rule_lock);
+
+ if (rule_exists && !list_empty(rule_head))
+ *rule_exists = true;
+
+ list_for_each_entry(fm_entry, rule_head, list_entry) {
+ if (ice_vsi_uses_fltr(fm_entry, vsi_handle)) {
+ ret = true;
+ break;
+ }
+ }
+
+ mutex_unlock(rule_lock);
+
+ return ret;
+}
+
+/**
+ * ice_remove_mac - remove a MAC address based filter rule
+ * @hw: pointer to the hardware structure
+ * @m_list: list of MAC addresses and forwarding information
+ *
+ * This function removes either a MAC filter rule or a specific VSI from a
+ * VSI list for a multicast MAC address.
+ *
+ * Returns -ENOENT if a given entry was not added by ice_add_mac. Caller should
+ * be aware that this call will only work if all the entries passed into m_list
+ * were added previously. It will not attempt to do a partial remove of entries
+ * that were found.
+ */
+int ice_remove_mac(struct ice_hw *hw, struct list_head *m_list)
+{
+ struct ice_fltr_list_entry *list_itr, *tmp;
+
+ if (!m_list)
+ return -EINVAL;
+
+ list_for_each_entry_safe(list_itr, tmp, m_list, list_entry) {
+ enum ice_sw_lkup_type l_type = list_itr->fltr_info.lkup_type;
+ u16 vsi_handle;
+
+ if (l_type != ICE_SW_LKUP_MAC)
+ return -EINVAL;
+
+ vsi_handle = list_itr->fltr_info.vsi_handle;
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+
+ list_itr->fltr_info.fwd_id.hw_vsi_id =
+ ice_get_hw_vsi_num(hw, vsi_handle);
+
+ list_itr->status = ice_remove_rule_internal(hw,
+ ICE_SW_LKUP_MAC,
+ list_itr);
+ if (list_itr->status)
+ return list_itr->status;
+ }
+ return 0;
+}
+
+/**
+ * ice_remove_vlan - Remove VLAN based filter rule
+ * @hw: pointer to the hardware structure
+ * @v_list: list of VLAN entries and forwarding information
+ */
+int ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list)
+{
+ struct ice_fltr_list_entry *v_list_itr, *tmp;
+
+ if (!v_list || !hw)
+ return -EINVAL;
+
+ list_for_each_entry_safe(v_list_itr, tmp, v_list, list_entry) {
+ enum ice_sw_lkup_type l_type = v_list_itr->fltr_info.lkup_type;
+
+ if (l_type != ICE_SW_LKUP_VLAN)
+ return -EINVAL;
+ v_list_itr->status = ice_remove_rule_internal(hw,
+ ICE_SW_LKUP_VLAN,
+ v_list_itr);
+ if (v_list_itr->status)
+ return v_list_itr->status;
+ }
+ return 0;
+}
+
+/**
+ * ice_add_entry_to_vsi_fltr_list - Add copy of fltr_list_entry to remove list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to remove filters from
+ * @vsi_list_head: pointer to the list to add entry to
+ * @fi: pointer to fltr_info of filter entry to copy & add
+ *
+ * Helper function, used when creating a list of filters to remove from
+ * a specific VSI. The entry added to vsi_list_head is a COPY of the
+ * original filter entry, with the exception of fltr_info.fltr_act and
+ * fltr_info.fwd_id fields. These are set such that later logic can
+ * extract which VSI to remove the fltr from, and pass on that information.
+ */
+static int
+ice_add_entry_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_handle,
+ struct list_head *vsi_list_head,
+ struct ice_fltr_info *fi)
+{
+ struct ice_fltr_list_entry *tmp;
+
+ /* this memory is freed up in the caller function
+ * once filters for this VSI are removed
+ */
+ tmp = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ tmp->fltr_info = *fi;
+
+ /* Overwrite these fields to indicate which VSI to remove filter from,
+ * so find and remove logic can extract the information from the
+ * list entries. Note that original entries will still have proper
+ * values.
+ */
+ tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+ tmp->fltr_info.vsi_handle = vsi_handle;
+ tmp->fltr_info.fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+
+ list_add(&tmp->list_entry, vsi_list_head);
+
+ return 0;
+}
+
+/**
+ * ice_add_to_vsi_fltr_list - Add VSI filters to the list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to remove filters from
+ * @lkup_list_head: pointer to the list that has certain lookup type filters
+ * @vsi_list_head: pointer to the list pertaining to VSI with vsi_handle
+ *
+ * Locates all filters in lkup_list_head that are used by the given VSI,
+ * and adds COPIES of those entries to vsi_list_head (intended to be used
+ * to remove the listed filters).
+ * Note that this means all entries in vsi_list_head must be explicitly
+ * deallocated by the caller when done with list.
+ */
+static int
+ice_add_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_handle,
+ struct list_head *lkup_list_head,
+ struct list_head *vsi_list_head)
+{
+ struct ice_fltr_mgmt_list_entry *fm_entry;
+ int status = 0;
+
+ /* check to make sure VSI ID is valid and within boundary */
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+
+ list_for_each_entry(fm_entry, lkup_list_head, list_entry) {
+ if (!ice_vsi_uses_fltr(fm_entry, vsi_handle))
+ continue;
+
+ status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle,
+ vsi_list_head,
+ &fm_entry->fltr_info);
+ if (status)
+ return status;
+ }
+ return status;
+}
+
+/**
+ * ice_determine_promisc_mask
+ * @fi: filter info to parse
+ *
+ * Helper function to determine which ICE_PROMISC_ mask corresponds
+ * to given filter into.
+ */
+static u8 ice_determine_promisc_mask(struct ice_fltr_info *fi)
+{
+ u16 vid = fi->l_data.mac_vlan.vlan_id;
+ u8 *macaddr = fi->l_data.mac.mac_addr;
+ bool is_tx_fltr = false;
+ u8 promisc_mask = 0;
+
+ if (fi->flag == ICE_FLTR_TX)
+ is_tx_fltr = true;
+
+ if (is_broadcast_ether_addr(macaddr))
+ promisc_mask |= is_tx_fltr ?
+ ICE_PROMISC_BCAST_TX : ICE_PROMISC_BCAST_RX;
+ else if (is_multicast_ether_addr(macaddr))
+ promisc_mask |= is_tx_fltr ?
+ ICE_PROMISC_MCAST_TX : ICE_PROMISC_MCAST_RX;
+ else if (is_unicast_ether_addr(macaddr))
+ promisc_mask |= is_tx_fltr ?
+ ICE_PROMISC_UCAST_TX : ICE_PROMISC_UCAST_RX;
+ if (vid)
+ promisc_mask |= is_tx_fltr ?
+ ICE_PROMISC_VLAN_TX : ICE_PROMISC_VLAN_RX;
+
+ return promisc_mask;
+}
+
+/**
+ * ice_remove_promisc - Remove promisc based filter rules
+ * @hw: pointer to the hardware structure
+ * @recp_id: recipe ID for which the rule needs to removed
+ * @v_list: list of promisc entries
+ */
+static int
+ice_remove_promisc(struct ice_hw *hw, u8 recp_id, struct list_head *v_list)
+{
+ struct ice_fltr_list_entry *v_list_itr, *tmp;
+
+ list_for_each_entry_safe(v_list_itr, tmp, v_list, list_entry) {
+ v_list_itr->status =
+ ice_remove_rule_internal(hw, recp_id, v_list_itr);
+ if (v_list_itr->status)
+ return v_list_itr->status;
+ }
+ return 0;
+}
+
+/**
+ * ice_clear_vsi_promisc - clear specified promiscuous mode(s) for given VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to clear mode
+ * @promisc_mask: mask of promiscuous config bits to clear
+ * @vid: VLAN ID to clear VLAN promiscuous
+ */
+int
+ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+ u16 vid)
+{
+ struct ice_switch_info *sw = hw->switch_info;
+ struct ice_fltr_list_entry *fm_entry, *tmp;
+ struct list_head remove_list_head;
+ struct ice_fltr_mgmt_list_entry *itr;
+ struct list_head *rule_head;
+ struct mutex *rule_lock; /* Lock to protect filter rule list */
+ int status = 0;
+ u8 recipe_id;
+
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+
+ if (promisc_mask & (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX))
+ recipe_id = ICE_SW_LKUP_PROMISC_VLAN;
+ else
+ recipe_id = ICE_SW_LKUP_PROMISC;
+
+ rule_head = &sw->recp_list[recipe_id].filt_rules;
+ rule_lock = &sw->recp_list[recipe_id].filt_rule_lock;
+
+ INIT_LIST_HEAD(&remove_list_head);
+
+ mutex_lock(rule_lock);
+ list_for_each_entry(itr, rule_head, list_entry) {
+ struct ice_fltr_info *fltr_info;
+ u8 fltr_promisc_mask = 0;
+
+ if (!ice_vsi_uses_fltr(itr, vsi_handle))
+ continue;
+ fltr_info = &itr->fltr_info;
+
+ if (recipe_id == ICE_SW_LKUP_PROMISC_VLAN &&
+ vid != fltr_info->l_data.mac_vlan.vlan_id)
+ continue;
+
+ fltr_promisc_mask |= ice_determine_promisc_mask(fltr_info);
+
+ /* Skip if filter is not completely specified by given mask */
+ if (fltr_promisc_mask & ~promisc_mask)
+ continue;
+
+ status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle,
+ &remove_list_head,
+ fltr_info);
+ if (status) {
+ mutex_unlock(rule_lock);
+ goto free_fltr_list;
+ }
+ }
+ mutex_unlock(rule_lock);
+
+ status = ice_remove_promisc(hw, recipe_id, &remove_list_head);
+
+free_fltr_list:
+ list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) {
+ list_del(&fm_entry->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), fm_entry);
+ }
+
+ return status;
+}
+
+/**
+ * ice_set_vsi_promisc - set given VSI to given promiscuous mode(s)
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to configure
+ * @promisc_mask: mask of promiscuous config bits
+ * @vid: VLAN ID to set VLAN promiscuous
+ */
+int
+ice_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, u16 vid)
+{
+ enum { UCAST_FLTR = 1, MCAST_FLTR, BCAST_FLTR };
+ struct ice_fltr_list_entry f_list_entry;
+ struct ice_fltr_info new_fltr;
+ bool is_tx_fltr;
+ int status = 0;
+ u16 hw_vsi_id;
+ int pkt_type;
+ u8 recipe_id;
+
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return -EINVAL;
+ hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+
+ memset(&new_fltr, 0, sizeof(new_fltr));
+
+ if (promisc_mask & (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX)) {
+ new_fltr.lkup_type = ICE_SW_LKUP_PROMISC_VLAN;
+ new_fltr.l_data.mac_vlan.vlan_id = vid;
+ recipe_id = ICE_SW_LKUP_PROMISC_VLAN;
+ } else {
+ new_fltr.lkup_type = ICE_SW_LKUP_PROMISC;
+ recipe_id = ICE_SW_LKUP_PROMISC;
+ }
+
+ /* Separate filters must be set for each direction/packet type
+ * combination, so we will loop over the mask value, store the
+ * individual type, and clear it out in the input mask as it
+ * is found.
+ */
+ while (promisc_mask) {
+ u8 *mac_addr;
+
+ pkt_type = 0;
+ is_tx_fltr = false;
+
+ if (promisc_mask & ICE_PROMISC_UCAST_RX) {
+ promisc_mask &= ~ICE_PROMISC_UCAST_RX;
+ pkt_type = UCAST_FLTR;
+ } else if (promisc_mask & ICE_PROMISC_UCAST_TX) {
+ promisc_mask &= ~ICE_PROMISC_UCAST_TX;
+ pkt_type = UCAST_FLTR;
+ is_tx_fltr = true;
+ } else if (promisc_mask & ICE_PROMISC_MCAST_RX) {
+ promisc_mask &= ~ICE_PROMISC_MCAST_RX;
+ pkt_type = MCAST_FLTR;
+ } else if (promisc_mask & ICE_PROMISC_MCAST_TX) {
+ promisc_mask &= ~ICE_PROMISC_MCAST_TX;
+ pkt_type = MCAST_FLTR;
+ is_tx_fltr = true;
+ } else if (promisc_mask & ICE_PROMISC_BCAST_RX) {
+ promisc_mask &= ~ICE_PROMISC_BCAST_RX;
+ pkt_type = BCAST_FLTR;
+ } else if (promisc_mask & ICE_PROMISC_BCAST_TX) {
+ promisc_mask &= ~ICE_PROMISC_BCAST_TX;
+ pkt_type = BCAST_FLTR;
+ is_tx_fltr = true;
+ }
+
+ /* Check for VLAN promiscuous flag */
+ if (promisc_mask & ICE_PROMISC_VLAN_RX) {
+ promisc_mask &= ~ICE_PROMISC_VLAN_RX;
+ } else if (promisc_mask & ICE_PROMISC_VLAN_TX) {
+ promisc_mask &= ~ICE_PROMISC_VLAN_TX;
+ is_tx_fltr = true;
+ }
+
+ /* Set filter DA based on packet type */
+ mac_addr = new_fltr.l_data.mac.mac_addr;
+ if (pkt_type == BCAST_FLTR) {
+ eth_broadcast_addr(mac_addr);
+ } else if (pkt_type == MCAST_FLTR ||
+ pkt_type == UCAST_FLTR) {
+ /* Use the dummy ether header DA */
+ ether_addr_copy(mac_addr, dummy_eth_header);
+ if (pkt_type == MCAST_FLTR)
+ mac_addr[0] |= 0x1; /* Set multicast bit */
+ }
+
+ /* Need to reset this to zero for all iterations */
+ new_fltr.flag = 0;
+ if (is_tx_fltr) {
+ new_fltr.flag |= ICE_FLTR_TX;
+ new_fltr.src = hw_vsi_id;
+ } else {
+ new_fltr.flag |= ICE_FLTR_RX;
+ new_fltr.src = hw->port_info->lport;
+ }
+
+ new_fltr.fltr_act = ICE_FWD_TO_VSI;
+ new_fltr.vsi_handle = vsi_handle;
+ new_fltr.fwd_id.hw_vsi_id = hw_vsi_id;
+ f_list_entry.fltr_info = new_fltr;
+
+ status = ice_add_rule_internal(hw, recipe_id, &f_list_entry);
+ if (status)
+ goto set_promisc_exit;
+ }
+
+set_promisc_exit:
+ return status;
+}
+
+/**
+ * ice_set_vlan_vsi_promisc
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to configure
+ * @promisc_mask: mask of promiscuous config bits
+ * @rm_vlan_promisc: Clear VLANs VSI promisc mode
+ *
+ * Configure VSI with all associated VLANs to given promiscuous mode(s)
+ */
+int
+ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+ bool rm_vlan_promisc)
+{
+ struct ice_switch_info *sw = hw->switch_info;
+ struct ice_fltr_list_entry *list_itr, *tmp;
+ struct list_head vsi_list_head;
+ struct list_head *vlan_head;
+ struct mutex *vlan_lock; /* Lock to protect filter rule list */
+ u16 vlan_id;
+ int status;
+
+ INIT_LIST_HEAD(&vsi_list_head);
+ vlan_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
+ vlan_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules;
+ mutex_lock(vlan_lock);
+ status = ice_add_to_vsi_fltr_list(hw, vsi_handle, vlan_head,
+ &vsi_list_head);
+ mutex_unlock(vlan_lock);
+ if (status)
+ goto free_fltr_list;
+
+ list_for_each_entry(list_itr, &vsi_list_head, list_entry) {
+ /* Avoid enabling or disabling VLAN zero twice when in double
+ * VLAN mode
+ */
+ if (ice_is_dvm_ena(hw) &&
+ list_itr->fltr_info.l_data.vlan.tpid == 0)
+ continue;
+
+ vlan_id = list_itr->fltr_info.l_data.vlan.vlan_id;
+ if (rm_vlan_promisc)
+ status = ice_clear_vsi_promisc(hw, vsi_handle,
+ promisc_mask, vlan_id);
+ else
+ status = ice_set_vsi_promisc(hw, vsi_handle,
+ promisc_mask, vlan_id);
+ if (status && status != -EEXIST)
+ break;
+ }
+
+free_fltr_list:
+ list_for_each_entry_safe(list_itr, tmp, &vsi_list_head, list_entry) {
+ list_del(&list_itr->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), list_itr);
+ }
+ return status;
+}
+
+/**
+ * ice_remove_vsi_lkup_fltr - Remove lookup type filters for a VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to remove filters from
+ * @lkup: switch rule filter lookup type
+ */
+static void
+ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_handle,
+ enum ice_sw_lkup_type lkup)
+{
+ struct ice_switch_info *sw = hw->switch_info;
+ struct ice_fltr_list_entry *fm_entry;
+ struct list_head remove_list_head;
+ struct list_head *rule_head;
+ struct ice_fltr_list_entry *tmp;
+ struct mutex *rule_lock; /* Lock to protect filter rule list */
+ int status;
+
+ INIT_LIST_HEAD(&remove_list_head);
+ rule_lock = &sw->recp_list[lkup].filt_rule_lock;
+ rule_head = &sw->recp_list[lkup].filt_rules;
+ mutex_lock(rule_lock);
+ status = ice_add_to_vsi_fltr_list(hw, vsi_handle, rule_head,
+ &remove_list_head);
+ mutex_unlock(rule_lock);
+ if (status)
+ goto free_fltr_list;
+
+ switch (lkup) {
+ case ICE_SW_LKUP_MAC:
+ ice_remove_mac(hw, &remove_list_head);
+ break;
+ case ICE_SW_LKUP_VLAN:
+ ice_remove_vlan(hw, &remove_list_head);
+ break;
+ case ICE_SW_LKUP_PROMISC:
+ case ICE_SW_LKUP_PROMISC_VLAN:
+ ice_remove_promisc(hw, lkup, &remove_list_head);
+ break;
+ case ICE_SW_LKUP_MAC_VLAN:
+ case ICE_SW_LKUP_ETHERTYPE:
+ case ICE_SW_LKUP_ETHERTYPE_MAC:
+ case ICE_SW_LKUP_DFLT:
+ case ICE_SW_LKUP_LAST:
+ default:
+ ice_debug(hw, ICE_DBG_SW, "Unsupported lookup type %d\n", lkup);
+ break;
+ }
+
+free_fltr_list:
+ list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) {
+ list_del(&fm_entry->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), fm_entry);
+ }
+}
+
+/**
+ * ice_remove_vsi_fltr - Remove all filters for a VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to remove filters from
+ */
+void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_handle)
+{
+ ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_MAC);
+ ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_MAC_VLAN);
+ ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_PROMISC);
+ ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_VLAN);
+ ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_DFLT);
+ ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_ETHERTYPE);
+ ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_ETHERTYPE_MAC);
+ ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_PROMISC_VLAN);
+}
+
+/**
+ * ice_alloc_res_cntr - allocating resource counter
+ * @hw: pointer to the hardware structure
+ * @type: type of resource
+ * @alloc_shared: if set it is shared else dedicated
+ * @num_items: number of entries requested for FD resource type
+ * @counter_id: counter index returned by AQ call
+ */
+int
+ice_alloc_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
+ u16 *counter_id)
+{
+ struct ice_aqc_alloc_free_res_elem *buf;
+ u16 buf_len;
+ int status;
+
+ /* Allocate resource */
+ buf_len = struct_size(buf, elem, 1);
+ buf = kzalloc(buf_len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ buf->num_elems = cpu_to_le16(num_items);
+ buf->res_type = cpu_to_le16(((type << ICE_AQC_RES_TYPE_S) &
+ ICE_AQC_RES_TYPE_M) | alloc_shared);
+
+ status = ice_aq_alloc_free_res(hw, 1, buf, buf_len,
+ ice_aqc_opc_alloc_res, NULL);
+ if (status)
+ goto exit;
+
+ *counter_id = le16_to_cpu(buf->elem[0].e.sw_resp);
+
+exit:
+ kfree(buf);
+ return status;
+}
+
+/**
+ * ice_free_res_cntr - free resource counter
+ * @hw: pointer to the hardware structure
+ * @type: type of resource
+ * @alloc_shared: if set it is shared else dedicated
+ * @num_items: number of entries to be freed for FD resource type
+ * @counter_id: counter ID resource which needs to be freed
+ */
+int
+ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
+ u16 counter_id)
+{
+ struct ice_aqc_alloc_free_res_elem *buf;
+ u16 buf_len;
+ int status;
+
+ /* Free resource */
+ buf_len = struct_size(buf, elem, 1);
+ buf = kzalloc(buf_len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ buf->num_elems = cpu_to_le16(num_items);
+ buf->res_type = cpu_to_le16(((type << ICE_AQC_RES_TYPE_S) &
+ ICE_AQC_RES_TYPE_M) | alloc_shared);
+ buf->elem[0].e.sw_resp = cpu_to_le16(counter_id);
+
+ status = ice_aq_alloc_free_res(hw, 1, buf, buf_len,
+ ice_aqc_opc_free_res, NULL);
+ if (status)
+ ice_debug(hw, ICE_DBG_SW, "counter resource could not be freed\n");
+
+ kfree(buf);
+ return status;
+}
+
+/* This is mapping table entry that maps every word within a given protocol
+ * structure to the real byte offset as per the specification of that
+ * protocol header.
+ * for example dst address is 3 words in ethertype header and corresponding
+ * bytes are 0, 2, 3 in the actual packet header and src address is at 4, 6, 8
+ * IMPORTANT: Every structure part of "ice_prot_hdr" union should have a
+ * matching entry describing its field. This needs to be updated if new
+ * structure is added to that union.
+ */
+static const struct ice_prot_ext_tbl_entry ice_prot_ext[ICE_PROTOCOL_LAST] = {
+ { ICE_MAC_OFOS, { 0, 2, 4, 6, 8, 10, 12 } },
+ { ICE_MAC_IL, { 0, 2, 4, 6, 8, 10, 12 } },
+ { ICE_ETYPE_OL, { 0 } },
+ { ICE_ETYPE_IL, { 0 } },
+ { ICE_VLAN_OFOS, { 2, 0 } },
+ { ICE_IPV4_OFOS, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18 } },
+ { ICE_IPV4_IL, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18 } },
+ { ICE_IPV6_OFOS, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24,
+ 26, 28, 30, 32, 34, 36, 38 } },
+ { ICE_IPV6_IL, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24,
+ 26, 28, 30, 32, 34, 36, 38 } },
+ { ICE_TCP_IL, { 0, 2 } },
+ { ICE_UDP_OF, { 0, 2 } },
+ { ICE_UDP_ILOS, { 0, 2 } },
+ { ICE_VXLAN, { 8, 10, 12, 14 } },
+ { ICE_GENEVE, { 8, 10, 12, 14 } },
+ { ICE_NVGRE, { 0, 2, 4, 6 } },
+ { ICE_GTP, { 8, 10, 12, 14, 16, 18, 20, 22 } },
+ { ICE_GTP_NO_PAY, { 8, 10, 12, 14 } },
+ { ICE_PPPOE, { 0, 2, 4, 6 } },
+ { ICE_L2TPV3, { 0, 2, 4, 6, 8, 10 } },
+ { ICE_VLAN_EX, { 2, 0 } },
+ { ICE_VLAN_IN, { 2, 0 } },
+};
+
+static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = {
+ { ICE_MAC_OFOS, ICE_MAC_OFOS_HW },
+ { ICE_MAC_IL, ICE_MAC_IL_HW },
+ { ICE_ETYPE_OL, ICE_ETYPE_OL_HW },
+ { ICE_ETYPE_IL, ICE_ETYPE_IL_HW },
+ { ICE_VLAN_OFOS, ICE_VLAN_OL_HW },
+ { ICE_IPV4_OFOS, ICE_IPV4_OFOS_HW },
+ { ICE_IPV4_IL, ICE_IPV4_IL_HW },
+ { ICE_IPV6_OFOS, ICE_IPV6_OFOS_HW },
+ { ICE_IPV6_IL, ICE_IPV6_IL_HW },
+ { ICE_TCP_IL, ICE_TCP_IL_HW },
+ { ICE_UDP_OF, ICE_UDP_OF_HW },
+ { ICE_UDP_ILOS, ICE_UDP_ILOS_HW },
+ { ICE_VXLAN, ICE_UDP_OF_HW },
+ { ICE_GENEVE, ICE_UDP_OF_HW },
+ { ICE_NVGRE, ICE_GRE_OF_HW },
+ { ICE_GTP, ICE_UDP_OF_HW },
+ { ICE_GTP_NO_PAY, ICE_UDP_ILOS_HW },
+ { ICE_PPPOE, ICE_PPPOE_HW },
+ { ICE_L2TPV3, ICE_L2TPV3_HW },
+ { ICE_VLAN_EX, ICE_VLAN_OF_HW },
+ { ICE_VLAN_IN, ICE_VLAN_OL_HW },
+};
+
+/**
+ * ice_find_recp - find a recipe
+ * @hw: pointer to the hardware structure
+ * @lkup_exts: extension sequence to match
+ * @tun_type: type of recipe tunnel
+ *
+ * Returns index of matching recipe, or ICE_MAX_NUM_RECIPES if not found.
+ */
+static u16
+ice_find_recp(struct ice_hw *hw, struct ice_prot_lkup_ext *lkup_exts,
+ enum ice_sw_tunnel_type tun_type)
+{
+ bool refresh_required = true;
+ struct ice_sw_recipe *recp;
+ u8 i;
+
+ /* Walk through existing recipes to find a match */
+ recp = hw->switch_info->recp_list;
+ for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) {
+ /* If recipe was not created for this ID, in SW bookkeeping,
+ * check if FW has an entry for this recipe. If the FW has an
+ * entry update it in our SW bookkeeping and continue with the
+ * matching.
+ */
+ if (!recp[i].recp_created)
+ if (ice_get_recp_frm_fw(hw,
+ hw->switch_info->recp_list, i,
+ &refresh_required))
+ continue;
+
+ /* Skip inverse action recipes */
+ if (recp[i].root_buf && recp[i].root_buf->content.act_ctrl &
+ ICE_AQ_RECIPE_ACT_INV_ACT)
+ continue;
+
+ /* if number of words we are looking for match */
+ if (lkup_exts->n_val_words == recp[i].lkup_exts.n_val_words) {
+ struct ice_fv_word *ar = recp[i].lkup_exts.fv_words;
+ struct ice_fv_word *be = lkup_exts->fv_words;
+ u16 *cr = recp[i].lkup_exts.field_mask;
+ u16 *de = lkup_exts->field_mask;
+ bool found = true;
+ u8 pe, qr;
+
+ /* ar, cr, and qr are related to the recipe words, while
+ * be, de, and pe are related to the lookup words
+ */
+ for (pe = 0; pe < lkup_exts->n_val_words; pe++) {
+ for (qr = 0; qr < recp[i].lkup_exts.n_val_words;
+ qr++) {
+ if (ar[qr].off == be[pe].off &&
+ ar[qr].prot_id == be[pe].prot_id &&
+ cr[qr] == de[pe])
+ /* Found the "pe"th word in the
+ * given recipe
+ */
+ break;
+ }
+ /* After walking through all the words in the
+ * "i"th recipe if "p"th word was not found then
+ * this recipe is not what we are looking for.
+ * So break out from this loop and try the next
+ * recipe
+ */
+ if (qr >= recp[i].lkup_exts.n_val_words) {
+ found = false;
+ break;
+ }
+ }
+ /* If for "i"th recipe the found was never set to false
+ * then it means we found our match
+ * Also tun type of recipe needs to be checked
+ */
+ if (found && recp[i].tun_type == tun_type)
+ return i; /* Return the recipe ID */
+ }
+ }
+ return ICE_MAX_NUM_RECIPES;
+}
+
+/**
+ * ice_change_proto_id_to_dvm - change proto id in prot_id_tbl
+ *
+ * As protocol id for outer vlan is different in dvm and svm, if dvm is
+ * supported protocol array record for outer vlan has to be modified to
+ * reflect the value proper for DVM.
+ */
+void ice_change_proto_id_to_dvm(void)
+{
+ u8 i;
+
+ for (i = 0; i < ARRAY_SIZE(ice_prot_id_tbl); i++)
+ if (ice_prot_id_tbl[i].type == ICE_VLAN_OFOS &&
+ ice_prot_id_tbl[i].protocol_id != ICE_VLAN_OF_HW)
+ ice_prot_id_tbl[i].protocol_id = ICE_VLAN_OF_HW;
+}
+
+/**
+ * ice_prot_type_to_id - get protocol ID from protocol type
+ * @type: protocol type
+ * @id: pointer to variable that will receive the ID
+ *
+ * Returns true if found, false otherwise
+ */
+static bool ice_prot_type_to_id(enum ice_protocol_type type, u8 *id)
+{
+ u8 i;
+
+ for (i = 0; i < ARRAY_SIZE(ice_prot_id_tbl); i++)
+ if (ice_prot_id_tbl[i].type == type) {
+ *id = ice_prot_id_tbl[i].protocol_id;
+ return true;
+ }
+ return false;
+}
+
+/**
+ * ice_fill_valid_words - count valid words
+ * @rule: advanced rule with lookup information
+ * @lkup_exts: byte offset extractions of the words that are valid
+ *
+ * calculate valid words in a lookup rule using mask value
+ */
+static u8
+ice_fill_valid_words(struct ice_adv_lkup_elem *rule,
+ struct ice_prot_lkup_ext *lkup_exts)
+{
+ u8 j, word, prot_id, ret_val;
+
+ if (!ice_prot_type_to_id(rule->type, &prot_id))
+ return 0;
+
+ word = lkup_exts->n_val_words;
+
+ for (j = 0; j < sizeof(rule->m_u) / sizeof(u16); j++)
+ if (((u16 *)&rule->m_u)[j] &&
+ rule->type < ARRAY_SIZE(ice_prot_ext)) {
+ /* No more space to accommodate */
+ if (word >= ICE_MAX_CHAIN_WORDS)
+ return 0;
+ lkup_exts->fv_words[word].off =
+ ice_prot_ext[rule->type].offs[j];
+ lkup_exts->fv_words[word].prot_id =
+ ice_prot_id_tbl[rule->type].protocol_id;
+ lkup_exts->field_mask[word] =
+ be16_to_cpu(((__force __be16 *)&rule->m_u)[j]);
+ word++;
+ }
+
+ ret_val = word - lkup_exts->n_val_words;
+ lkup_exts->n_val_words = word;
+
+ return ret_val;
+}
+
+/**
+ * ice_create_first_fit_recp_def - Create a recipe grouping
+ * @hw: pointer to the hardware structure
+ * @lkup_exts: an array of protocol header extractions
+ * @rg_list: pointer to a list that stores new recipe groups
+ * @recp_cnt: pointer to a variable that stores returned number of recipe groups
+ *
+ * Using first fit algorithm, take all the words that are still not done
+ * and start grouping them in 4-word groups. Each group makes up one
+ * recipe.
+ */
+static int
+ice_create_first_fit_recp_def(struct ice_hw *hw,
+ struct ice_prot_lkup_ext *lkup_exts,
+ struct list_head *rg_list,
+ u8 *recp_cnt)
+{
+ struct ice_pref_recipe_group *grp = NULL;
+ u8 j;
+
+ *recp_cnt = 0;
+
+ /* Walk through every word in the rule to check if it is not done. If so
+ * then this word needs to be part of a new recipe.
+ */
+ for (j = 0; j < lkup_exts->n_val_words; j++)
+ if (!test_bit(j, lkup_exts->done)) {
+ if (!grp ||
+ grp->n_val_pairs == ICE_NUM_WORDS_RECIPE) {
+ struct ice_recp_grp_entry *entry;
+
+ entry = devm_kzalloc(ice_hw_to_dev(hw),
+ sizeof(*entry),
+ GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+ list_add(&entry->l_entry, rg_list);
+ grp = &entry->r_group;
+ (*recp_cnt)++;
+ }
+
+ grp->pairs[grp->n_val_pairs].prot_id =
+ lkup_exts->fv_words[j].prot_id;
+ grp->pairs[grp->n_val_pairs].off =
+ lkup_exts->fv_words[j].off;
+ grp->mask[grp->n_val_pairs] = lkup_exts->field_mask[j];
+ grp->n_val_pairs++;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_fill_fv_word_index - fill in the field vector indices for a recipe group
+ * @hw: pointer to the hardware structure
+ * @fv_list: field vector with the extraction sequence information
+ * @rg_list: recipe groupings with protocol-offset pairs
+ *
+ * Helper function to fill in the field vector indices for protocol-offset
+ * pairs. These indexes are then ultimately programmed into a recipe.
+ */
+static int
+ice_fill_fv_word_index(struct ice_hw *hw, struct list_head *fv_list,
+ struct list_head *rg_list)
+{
+ struct ice_sw_fv_list_entry *fv;
+ struct ice_recp_grp_entry *rg;
+ struct ice_fv_word *fv_ext;
+
+ if (list_empty(fv_list))
+ return 0;
+
+ fv = list_first_entry(fv_list, struct ice_sw_fv_list_entry,
+ list_entry);
+ fv_ext = fv->fv_ptr->ew;
+
+ list_for_each_entry(rg, rg_list, l_entry) {
+ u8 i;
+
+ for (i = 0; i < rg->r_group.n_val_pairs; i++) {
+ struct ice_fv_word *pr;
+ bool found = false;
+ u16 mask;
+ u8 j;
+
+ pr = &rg->r_group.pairs[i];
+ mask = rg->r_group.mask[i];
+
+ for (j = 0; j < hw->blk[ICE_BLK_SW].es.fvw; j++)
+ if (fv_ext[j].prot_id == pr->prot_id &&
+ fv_ext[j].off == pr->off) {
+ found = true;
+
+ /* Store index of field vector */
+ rg->fv_idx[i] = j;
+ rg->fv_mask[i] = mask;
+ break;
+ }
+
+ /* Protocol/offset could not be found, caller gave an
+ * invalid pair
+ */
+ if (!found)
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_find_free_recp_res_idx - find free result indexes for recipe
+ * @hw: pointer to hardware structure
+ * @profiles: bitmap of profiles that will be associated with the new recipe
+ * @free_idx: pointer to variable to receive the free index bitmap
+ *
+ * The algorithm used here is:
+ * 1. When creating a new recipe, create a set P which contains all
+ * Profiles that will be associated with our new recipe
+ *
+ * 2. For each Profile p in set P:
+ * a. Add all recipes associated with Profile p into set R
+ * b. Optional : PossibleIndexes &= profile[p].possibleIndexes
+ * [initially PossibleIndexes should be 0xFFFFFFFFFFFFFFFF]
+ * i. Or just assume they all have the same possible indexes:
+ * 44, 45, 46, 47
+ * i.e., PossibleIndexes = 0x0000F00000000000
+ *
+ * 3. For each Recipe r in set R:
+ * a. UsedIndexes |= (bitwise or ) recipe[r].res_indexes
+ * b. FreeIndexes = UsedIndexes ^ PossibleIndexes
+ *
+ * FreeIndexes will contain the bits indicating the indexes free for use,
+ * then the code needs to update the recipe[r].used_result_idx_bits to
+ * indicate which indexes were selected for use by this recipe.
+ */
+static u16
+ice_find_free_recp_res_idx(struct ice_hw *hw, const unsigned long *profiles,
+ unsigned long *free_idx)
+{
+ DECLARE_BITMAP(possible_idx, ICE_MAX_FV_WORDS);
+ DECLARE_BITMAP(recipes, ICE_MAX_NUM_RECIPES);
+ DECLARE_BITMAP(used_idx, ICE_MAX_FV_WORDS);
+ u16 bit;
+
+ bitmap_zero(recipes, ICE_MAX_NUM_RECIPES);
+ bitmap_zero(used_idx, ICE_MAX_FV_WORDS);
+
+ bitmap_fill(possible_idx, ICE_MAX_FV_WORDS);
+
+ /* For each profile we are going to associate the recipe with, add the
+ * recipes that are associated with that profile. This will give us
+ * the set of recipes that our recipe may collide with. Also, determine
+ * what possible result indexes are usable given this set of profiles.
+ */
+ for_each_set_bit(bit, profiles, ICE_MAX_NUM_PROFILES) {
+ bitmap_or(recipes, recipes, profile_to_recipe[bit],
+ ICE_MAX_NUM_RECIPES);
+ bitmap_and(possible_idx, possible_idx,
+ hw->switch_info->prof_res_bm[bit],
+ ICE_MAX_FV_WORDS);
+ }
+
+ /* For each recipe that our new recipe may collide with, determine
+ * which indexes have been used.
+ */
+ for_each_set_bit(bit, recipes, ICE_MAX_NUM_RECIPES)
+ bitmap_or(used_idx, used_idx,
+ hw->switch_info->recp_list[bit].res_idxs,
+ ICE_MAX_FV_WORDS);
+
+ bitmap_xor(free_idx, used_idx, possible_idx, ICE_MAX_FV_WORDS);
+
+ /* return number of free indexes */
+ return (u16)bitmap_weight(free_idx, ICE_MAX_FV_WORDS);
+}
+
+/**
+ * ice_add_sw_recipe - function to call AQ calls to create switch recipe
+ * @hw: pointer to hardware structure
+ * @rm: recipe management list entry
+ * @profiles: bitmap of profiles that will be associated.
+ */
+static int
+ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm,
+ unsigned long *profiles)
+{
+ DECLARE_BITMAP(result_idx_bm, ICE_MAX_FV_WORDS);
+ struct ice_aqc_recipe_data_elem *tmp;
+ struct ice_aqc_recipe_data_elem *buf;
+ struct ice_recp_grp_entry *entry;
+ u16 free_res_idx;
+ u16 recipe_count;
+ u8 chain_idx;
+ u8 recps = 0;
+ int status;
+
+ /* When more than one recipe are required, another recipe is needed to
+ * chain them together. Matching a tunnel metadata ID takes up one of
+ * the match fields in the chaining recipe reducing the number of
+ * chained recipes by one.
+ */
+ /* check number of free result indices */
+ bitmap_zero(result_idx_bm, ICE_MAX_FV_WORDS);
+ free_res_idx = ice_find_free_recp_res_idx(hw, profiles, result_idx_bm);
+
+ ice_debug(hw, ICE_DBG_SW, "Result idx slots: %d, need %d\n",
+ free_res_idx, rm->n_grp_count);
+
+ if (rm->n_grp_count > 1) {
+ if (rm->n_grp_count > free_res_idx)
+ return -ENOSPC;
+
+ rm->n_grp_count++;
+ }
+
+ if (rm->n_grp_count > ICE_MAX_CHAIN_RECIPE)
+ return -ENOSPC;
+
+ tmp = kcalloc(ICE_MAX_NUM_RECIPES, sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ buf = devm_kcalloc(ice_hw_to_dev(hw), rm->n_grp_count, sizeof(*buf),
+ GFP_KERNEL);
+ if (!buf) {
+ status = -ENOMEM;
+ goto err_mem;
+ }
+
+ bitmap_zero(rm->r_bitmap, ICE_MAX_NUM_RECIPES);
+ recipe_count = ICE_MAX_NUM_RECIPES;
+ status = ice_aq_get_recipe(hw, tmp, &recipe_count, ICE_SW_LKUP_MAC,
+ NULL);
+ if (status || recipe_count == 0)
+ goto err_unroll;
+
+ /* Allocate the recipe resources, and configure them according to the
+ * match fields from protocol headers and extracted field vectors.
+ */
+ chain_idx = find_first_bit(result_idx_bm, ICE_MAX_FV_WORDS);
+ list_for_each_entry(entry, &rm->rg_list, l_entry) {
+ u8 i;
+
+ status = ice_alloc_recipe(hw, &entry->rid);
+ if (status)
+ goto err_unroll;
+
+ /* Clear the result index of the located recipe, as this will be
+ * updated, if needed, later in the recipe creation process.
+ */
+ tmp[0].content.result_indx = 0;
+
+ buf[recps] = tmp[0];
+ buf[recps].recipe_indx = (u8)entry->rid;
+ /* if the recipe is a non-root recipe RID should be programmed
+ * as 0 for the rules to be applied correctly.
+ */
+ buf[recps].content.rid = 0;
+ memset(&buf[recps].content.lkup_indx, 0,
+ sizeof(buf[recps].content.lkup_indx));
+
+ /* All recipes use look-up index 0 to match switch ID. */
+ buf[recps].content.lkup_indx[0] = ICE_AQ_SW_ID_LKUP_IDX;
+ buf[recps].content.mask[0] =
+ cpu_to_le16(ICE_AQ_SW_ID_LKUP_MASK);
+ /* Setup lkup_indx 1..4 to INVALID/ignore and set the mask
+ * to be 0
+ */
+ for (i = 1; i <= ICE_NUM_WORDS_RECIPE; i++) {
+ buf[recps].content.lkup_indx[i] = 0x80;
+ buf[recps].content.mask[i] = 0;
+ }
+
+ for (i = 0; i < entry->r_group.n_val_pairs; i++) {
+ buf[recps].content.lkup_indx[i + 1] = entry->fv_idx[i];
+ buf[recps].content.mask[i + 1] =
+ cpu_to_le16(entry->fv_mask[i]);
+ }
+
+ if (rm->n_grp_count > 1) {
+ /* Checks to see if there really is a valid result index
+ * that can be used.
+ */
+ if (chain_idx >= ICE_MAX_FV_WORDS) {
+ ice_debug(hw, ICE_DBG_SW, "No chain index available\n");
+ status = -ENOSPC;
+ goto err_unroll;
+ }
+
+ entry->chain_idx = chain_idx;
+ buf[recps].content.result_indx =
+ ICE_AQ_RECIPE_RESULT_EN |
+ ((chain_idx << ICE_AQ_RECIPE_RESULT_DATA_S) &
+ ICE_AQ_RECIPE_RESULT_DATA_M);
+ clear_bit(chain_idx, result_idx_bm);
+ chain_idx = find_first_bit(result_idx_bm,
+ ICE_MAX_FV_WORDS);
+ }
+
+ /* fill recipe dependencies */
+ bitmap_zero((unsigned long *)buf[recps].recipe_bitmap,
+ ICE_MAX_NUM_RECIPES);
+ set_bit(buf[recps].recipe_indx,
+ (unsigned long *)buf[recps].recipe_bitmap);
+ buf[recps].content.act_ctrl_fwd_priority = rm->priority;
+ recps++;
+ }
+
+ if (rm->n_grp_count == 1) {
+ rm->root_rid = buf[0].recipe_indx;
+ set_bit(buf[0].recipe_indx, rm->r_bitmap);
+ buf[0].content.rid = rm->root_rid | ICE_AQ_RECIPE_ID_IS_ROOT;
+ if (sizeof(buf[0].recipe_bitmap) >= sizeof(rm->r_bitmap)) {
+ memcpy(buf[0].recipe_bitmap, rm->r_bitmap,
+ sizeof(buf[0].recipe_bitmap));
+ } else {
+ status = -EINVAL;
+ goto err_unroll;
+ }
+ /* Applicable only for ROOT_RECIPE, set the fwd_priority for
+ * the recipe which is getting created if specified
+ * by user. Usually any advanced switch filter, which results
+ * into new extraction sequence, ended up creating a new recipe
+ * of type ROOT and usually recipes are associated with profiles
+ * Switch rule referreing newly created recipe, needs to have
+ * either/or 'fwd' or 'join' priority, otherwise switch rule
+ * evaluation will not happen correctly. In other words, if
+ * switch rule to be evaluated on priority basis, then recipe
+ * needs to have priority, otherwise it will be evaluated last.
+ */
+ buf[0].content.act_ctrl_fwd_priority = rm->priority;
+ } else {
+ struct ice_recp_grp_entry *last_chain_entry;
+ u16 rid, i;
+
+ /* Allocate the last recipe that will chain the outcomes of the
+ * other recipes together
+ */
+ status = ice_alloc_recipe(hw, &rid);
+ if (status)
+ goto err_unroll;
+
+ buf[recps].recipe_indx = (u8)rid;
+ buf[recps].content.rid = (u8)rid;
+ buf[recps].content.rid |= ICE_AQ_RECIPE_ID_IS_ROOT;
+ /* the new entry created should also be part of rg_list to
+ * make sure we have complete recipe
+ */
+ last_chain_entry = devm_kzalloc(ice_hw_to_dev(hw),
+ sizeof(*last_chain_entry),
+ GFP_KERNEL);
+ if (!last_chain_entry) {
+ status = -ENOMEM;
+ goto err_unroll;
+ }
+ last_chain_entry->rid = rid;
+ memset(&buf[recps].content.lkup_indx, 0,
+ sizeof(buf[recps].content.lkup_indx));
+ /* All recipes use look-up index 0 to match switch ID. */
+ buf[recps].content.lkup_indx[0] = ICE_AQ_SW_ID_LKUP_IDX;
+ buf[recps].content.mask[0] =
+ cpu_to_le16(ICE_AQ_SW_ID_LKUP_MASK);
+ for (i = 1; i <= ICE_NUM_WORDS_RECIPE; i++) {
+ buf[recps].content.lkup_indx[i] =
+ ICE_AQ_RECIPE_LKUP_IGNORE;
+ buf[recps].content.mask[i] = 0;
+ }
+
+ i = 1;
+ /* update r_bitmap with the recp that is used for chaining */
+ set_bit(rid, rm->r_bitmap);
+ /* this is the recipe that chains all the other recipes so it
+ * should not have a chaining ID to indicate the same
+ */
+ last_chain_entry->chain_idx = ICE_INVAL_CHAIN_IND;
+ list_for_each_entry(entry, &rm->rg_list, l_entry) {
+ last_chain_entry->fv_idx[i] = entry->chain_idx;
+ buf[recps].content.lkup_indx[i] = entry->chain_idx;
+ buf[recps].content.mask[i++] = cpu_to_le16(0xFFFF);
+ set_bit(entry->rid, rm->r_bitmap);
+ }
+ list_add(&last_chain_entry->l_entry, &rm->rg_list);
+ if (sizeof(buf[recps].recipe_bitmap) >=
+ sizeof(rm->r_bitmap)) {
+ memcpy(buf[recps].recipe_bitmap, rm->r_bitmap,
+ sizeof(buf[recps].recipe_bitmap));
+ } else {
+ status = -EINVAL;
+ goto err_unroll;
+ }
+ buf[recps].content.act_ctrl_fwd_priority = rm->priority;
+
+ recps++;
+ rm->root_rid = (u8)rid;
+ }
+ status = ice_acquire_change_lock(hw, ICE_RES_WRITE);
+ if (status)
+ goto err_unroll;
+
+ status = ice_aq_add_recipe(hw, buf, rm->n_grp_count, NULL);
+ ice_release_change_lock(hw);
+ if (status)
+ goto err_unroll;
+
+ /* Every recipe that just got created add it to the recipe
+ * book keeping list
+ */
+ list_for_each_entry(entry, &rm->rg_list, l_entry) {
+ struct ice_switch_info *sw = hw->switch_info;
+ bool is_root, idx_found = false;
+ struct ice_sw_recipe *recp;
+ u16 idx, buf_idx = 0;
+
+ /* find buffer index for copying some data */
+ for (idx = 0; idx < rm->n_grp_count; idx++)
+ if (buf[idx].recipe_indx == entry->rid) {
+ buf_idx = idx;
+ idx_found = true;
+ }
+
+ if (!idx_found) {
+ status = -EIO;
+ goto err_unroll;
+ }
+
+ recp = &sw->recp_list[entry->rid];
+ is_root = (rm->root_rid == entry->rid);
+ recp->is_root = is_root;
+
+ recp->root_rid = entry->rid;
+ recp->big_recp = (is_root && rm->n_grp_count > 1);
+
+ memcpy(&recp->ext_words, entry->r_group.pairs,
+ entry->r_group.n_val_pairs * sizeof(struct ice_fv_word));
+
+ memcpy(recp->r_bitmap, buf[buf_idx].recipe_bitmap,
+ sizeof(recp->r_bitmap));
+
+ /* Copy non-result fv index values and masks to recipe. This
+ * call will also update the result recipe bitmask.
+ */
+ ice_collect_result_idx(&buf[buf_idx], recp);
+
+ /* for non-root recipes, also copy to the root, this allows
+ * easier matching of a complete chained recipe
+ */
+ if (!is_root)
+ ice_collect_result_idx(&buf[buf_idx],
+ &sw->recp_list[rm->root_rid]);
+
+ recp->n_ext_words = entry->r_group.n_val_pairs;
+ recp->chain_idx = entry->chain_idx;
+ recp->priority = buf[buf_idx].content.act_ctrl_fwd_priority;
+ recp->n_grp_count = rm->n_grp_count;
+ recp->tun_type = rm->tun_type;
+ recp->recp_created = true;
+ }
+ rm->root_buf = buf;
+ kfree(tmp);
+ return status;
+
+err_unroll:
+err_mem:
+ kfree(tmp);
+ devm_kfree(ice_hw_to_dev(hw), buf);
+ return status;
+}
+
+/**
+ * ice_create_recipe_group - creates recipe group
+ * @hw: pointer to hardware structure
+ * @rm: recipe management list entry
+ * @lkup_exts: lookup elements
+ */
+static int
+ice_create_recipe_group(struct ice_hw *hw, struct ice_sw_recipe *rm,
+ struct ice_prot_lkup_ext *lkup_exts)
+{
+ u8 recp_count = 0;
+ int status;
+
+ rm->n_grp_count = 0;
+
+ /* Create recipes for words that are marked not done by packing them
+ * as best fit.
+ */
+ status = ice_create_first_fit_recp_def(hw, lkup_exts,
+ &rm->rg_list, &recp_count);
+ if (!status) {
+ rm->n_grp_count += recp_count;
+ rm->n_ext_words = lkup_exts->n_val_words;
+ memcpy(&rm->ext_words, lkup_exts->fv_words,
+ sizeof(rm->ext_words));
+ memcpy(rm->word_masks, lkup_exts->field_mask,
+ sizeof(rm->word_masks));
+ }
+
+ return status;
+}
+
+/**
+ * ice_tun_type_match_word - determine if tun type needs a match mask
+ * @tun_type: tunnel type
+ * @mask: mask to be used for the tunnel
+ */
+static bool ice_tun_type_match_word(enum ice_sw_tunnel_type tun_type, u16 *mask)
+{
+ switch (tun_type) {
+ case ICE_SW_TUN_GENEVE:
+ case ICE_SW_TUN_VXLAN:
+ case ICE_SW_TUN_NVGRE:
+ case ICE_SW_TUN_GTPU:
+ case ICE_SW_TUN_GTPC:
+ *mask = ICE_TUN_FLAG_MASK;
+ return true;
+
+ default:
+ *mask = 0;
+ return false;
+ }
+}
+
+/**
+ * ice_add_special_words - Add words that are not protocols, such as metadata
+ * @rinfo: other information regarding the rule e.g. priority and action info
+ * @lkup_exts: lookup word structure
+ * @dvm_ena: is double VLAN mode enabled
+ */
+static int
+ice_add_special_words(struct ice_adv_rule_info *rinfo,
+ struct ice_prot_lkup_ext *lkup_exts, bool dvm_ena)
+{
+ u16 mask;
+
+ /* If this is a tunneled packet, then add recipe index to match the
+ * tunnel bit in the packet metadata flags.
+ */
+ if (ice_tun_type_match_word(rinfo->tun_type, &mask)) {
+ if (lkup_exts->n_val_words < ICE_MAX_CHAIN_WORDS) {
+ u8 word = lkup_exts->n_val_words++;
+
+ lkup_exts->fv_words[word].prot_id = ICE_META_DATA_ID_HW;
+ lkup_exts->fv_words[word].off = ICE_TUN_FLAG_MDID_OFF;
+ lkup_exts->field_mask[word] = mask;
+ } else {
+ return -ENOSPC;
+ }
+ }
+
+ if (rinfo->vlan_type != 0 && dvm_ena) {
+ if (lkup_exts->n_val_words < ICE_MAX_CHAIN_WORDS) {
+ u8 word = lkup_exts->n_val_words++;
+
+ lkup_exts->fv_words[word].prot_id = ICE_META_DATA_ID_HW;
+ lkup_exts->fv_words[word].off = ICE_VLAN_FLAG_MDID_OFF;
+ lkup_exts->field_mask[word] =
+ ICE_PKT_FLAGS_0_TO_15_VLAN_FLAGS_MASK;
+ } else {
+ return -ENOSPC;
+ }
+ }
+
+ return 0;
+}
+
+/* ice_get_compat_fv_bitmap - Get compatible field vector bitmap for rule
+ * @hw: pointer to hardware structure
+ * @rinfo: other information regarding the rule e.g. priority and action info
+ * @bm: pointer to memory for returning the bitmap of field vectors
+ */
+static void
+ice_get_compat_fv_bitmap(struct ice_hw *hw, struct ice_adv_rule_info *rinfo,
+ unsigned long *bm)
+{
+ enum ice_prof_type prof_type;
+
+ bitmap_zero(bm, ICE_MAX_NUM_PROFILES);
+
+ switch (rinfo->tun_type) {
+ case ICE_NON_TUN:
+ prof_type = ICE_PROF_NON_TUN;
+ break;
+ case ICE_ALL_TUNNELS:
+ prof_type = ICE_PROF_TUN_ALL;
+ break;
+ case ICE_SW_TUN_GENEVE:
+ case ICE_SW_TUN_VXLAN:
+ prof_type = ICE_PROF_TUN_UDP;
+ break;
+ case ICE_SW_TUN_NVGRE:
+ prof_type = ICE_PROF_TUN_GRE;
+ break;
+ case ICE_SW_TUN_GTPU:
+ prof_type = ICE_PROF_TUN_GTPU;
+ break;
+ case ICE_SW_TUN_GTPC:
+ prof_type = ICE_PROF_TUN_GTPC;
+ break;
+ case ICE_SW_TUN_AND_NON_TUN:
+ default:
+ prof_type = ICE_PROF_ALL;
+ break;
+ }
+
+ ice_get_sw_fv_bitmap(hw, prof_type, bm);
+}
+
+/**
+ * ice_add_adv_recipe - Add an advanced recipe that is not part of the default
+ * @hw: pointer to hardware structure
+ * @lkups: lookup elements or match criteria for the advanced recipe, one
+ * structure per protocol header
+ * @lkups_cnt: number of protocols
+ * @rinfo: other information regarding the rule e.g. priority and action info
+ * @rid: return the recipe ID of the recipe created
+ */
+static int
+ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
+ u16 lkups_cnt, struct ice_adv_rule_info *rinfo, u16 *rid)
+{
+ DECLARE_BITMAP(fv_bitmap, ICE_MAX_NUM_PROFILES);
+ DECLARE_BITMAP(profiles, ICE_MAX_NUM_PROFILES);
+ struct ice_prot_lkup_ext *lkup_exts;
+ struct ice_recp_grp_entry *r_entry;
+ struct ice_sw_fv_list_entry *fvit;
+ struct ice_recp_grp_entry *r_tmp;
+ struct ice_sw_fv_list_entry *tmp;
+ struct ice_sw_recipe *rm;
+ int status = 0;
+ u8 i;
+
+ if (!lkups_cnt)
+ return -EINVAL;
+
+ lkup_exts = kzalloc(sizeof(*lkup_exts), GFP_KERNEL);
+ if (!lkup_exts)
+ return -ENOMEM;
+
+ /* Determine the number of words to be matched and if it exceeds a
+ * recipe's restrictions
+ */
+ for (i = 0; i < lkups_cnt; i++) {
+ u16 count;
+
+ if (lkups[i].type >= ICE_PROTOCOL_LAST) {
+ status = -EIO;
+ goto err_free_lkup_exts;
+ }
+
+ count = ice_fill_valid_words(&lkups[i], lkup_exts);
+ if (!count) {
+ status = -EIO;
+ goto err_free_lkup_exts;
+ }
+ }
+
+ rm = kzalloc(sizeof(*rm), GFP_KERNEL);
+ if (!rm) {
+ status = -ENOMEM;
+ goto err_free_lkup_exts;
+ }
+
+ /* Get field vectors that contain fields extracted from all the protocol
+ * headers being programmed.
+ */
+ INIT_LIST_HEAD(&rm->fv_list);
+ INIT_LIST_HEAD(&rm->rg_list);
+
+ /* Get bitmap of field vectors (profiles) that are compatible with the
+ * rule request; only these will be searched in the subsequent call to
+ * ice_get_sw_fv_list.
+ */
+ ice_get_compat_fv_bitmap(hw, rinfo, fv_bitmap);
+
+ status = ice_get_sw_fv_list(hw, lkup_exts, fv_bitmap, &rm->fv_list);
+ if (status)
+ goto err_unroll;
+
+ /* Create any special protocol/offset pairs, such as looking at tunnel
+ * bits by extracting metadata
+ */
+ status = ice_add_special_words(rinfo, lkup_exts, ice_is_dvm_ena(hw));
+ if (status)
+ goto err_unroll;
+
+ /* Group match words into recipes using preferred recipe grouping
+ * criteria.
+ */
+ status = ice_create_recipe_group(hw, rm, lkup_exts);
+ if (status)
+ goto err_unroll;
+
+ /* set the recipe priority if specified */
+ rm->priority = (u8)rinfo->priority;
+
+ /* Find offsets from the field vector. Pick the first one for all the
+ * recipes.
+ */
+ status = ice_fill_fv_word_index(hw, &rm->fv_list, &rm->rg_list);
+ if (status)
+ goto err_unroll;
+
+ /* get bitmap of all profiles the recipe will be associated with */
+ bitmap_zero(profiles, ICE_MAX_NUM_PROFILES);
+ list_for_each_entry(fvit, &rm->fv_list, list_entry) {
+ ice_debug(hw, ICE_DBG_SW, "profile: %d\n", fvit->profile_id);
+ set_bit((u16)fvit->profile_id, profiles);
+ }
+
+ /* Look for a recipe which matches our requested fv / mask list */
+ *rid = ice_find_recp(hw, lkup_exts, rinfo->tun_type);
+ if (*rid < ICE_MAX_NUM_RECIPES)
+ /* Success if found a recipe that match the existing criteria */
+ goto err_unroll;
+
+ rm->tun_type = rinfo->tun_type;
+ /* Recipe we need does not exist, add a recipe */
+ status = ice_add_sw_recipe(hw, rm, profiles);
+ if (status)
+ goto err_unroll;
+
+ /* Associate all the recipes created with all the profiles in the
+ * common field vector.
+ */
+ list_for_each_entry(fvit, &rm->fv_list, list_entry) {
+ DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
+ u16 j;
+
+ status = ice_aq_get_recipe_to_profile(hw, fvit->profile_id,
+ (u8 *)r_bitmap, NULL);
+ if (status)
+ goto err_unroll;
+
+ bitmap_or(r_bitmap, r_bitmap, rm->r_bitmap,
+ ICE_MAX_NUM_RECIPES);
+ status = ice_acquire_change_lock(hw, ICE_RES_WRITE);
+ if (status)
+ goto err_unroll;
+
+ status = ice_aq_map_recipe_to_profile(hw, fvit->profile_id,
+ (u8 *)r_bitmap,
+ NULL);
+ ice_release_change_lock(hw);
+
+ if (status)
+ goto err_unroll;
+
+ /* Update profile to recipe bitmap array */
+ bitmap_copy(profile_to_recipe[fvit->profile_id], r_bitmap,
+ ICE_MAX_NUM_RECIPES);
+
+ /* Update recipe to profile bitmap array */
+ for_each_set_bit(j, rm->r_bitmap, ICE_MAX_NUM_RECIPES)
+ set_bit((u16)fvit->profile_id, recipe_to_profile[j]);
+ }
+
+ *rid = rm->root_rid;
+ memcpy(&hw->switch_info->recp_list[*rid].lkup_exts, lkup_exts,
+ sizeof(*lkup_exts));
+err_unroll:
+ list_for_each_entry_safe(r_entry, r_tmp, &rm->rg_list, l_entry) {
+ list_del(&r_entry->l_entry);
+ devm_kfree(ice_hw_to_dev(hw), r_entry);
+ }
+
+ list_for_each_entry_safe(fvit, tmp, &rm->fv_list, list_entry) {
+ list_del(&fvit->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), fvit);
+ }
+
+ if (rm->root_buf)
+ devm_kfree(ice_hw_to_dev(hw), rm->root_buf);
+
+ kfree(rm);
+
+err_free_lkup_exts:
+ kfree(lkup_exts);
+
+ return status;
+}
+
+/**
+ * ice_dummy_packet_add_vlan - insert VLAN header to dummy pkt
+ *
+ * @dummy_pkt: dummy packet profile pattern to which VLAN tag(s) will be added
+ * @num_vlan: number of VLAN tags
+ */
+static struct ice_dummy_pkt_profile *
+ice_dummy_packet_add_vlan(const struct ice_dummy_pkt_profile *dummy_pkt,
+ u32 num_vlan)
+{
+ struct ice_dummy_pkt_profile *profile;
+ struct ice_dummy_pkt_offsets *offsets;
+ u32 buf_len, off, etype_off, i;
+ u8 *pkt;
+
+ if (num_vlan < 1 || num_vlan > 2)
+ return ERR_PTR(-EINVAL);
+
+ off = num_vlan * VLAN_HLEN;
+
+ buf_len = array_size(num_vlan, sizeof(ice_dummy_vlan_packet_offsets)) +
+ dummy_pkt->offsets_len;
+ offsets = kzalloc(buf_len, GFP_KERNEL);
+ if (!offsets)
+ return ERR_PTR(-ENOMEM);
+
+ offsets[0] = dummy_pkt->offsets[0];
+ if (num_vlan == 2) {
+ offsets[1] = ice_dummy_qinq_packet_offsets[0];
+ offsets[2] = ice_dummy_qinq_packet_offsets[1];
+ } else if (num_vlan == 1) {
+ offsets[1] = ice_dummy_vlan_packet_offsets[0];
+ }
+
+ for (i = 1; dummy_pkt->offsets[i].type != ICE_PROTOCOL_LAST; i++) {
+ offsets[i + num_vlan].type = dummy_pkt->offsets[i].type;
+ offsets[i + num_vlan].offset =
+ dummy_pkt->offsets[i].offset + off;
+ }
+ offsets[i + num_vlan] = dummy_pkt->offsets[i];
+
+ etype_off = dummy_pkt->offsets[1].offset;
+
+ buf_len = array_size(num_vlan, sizeof(ice_dummy_vlan_packet)) +
+ dummy_pkt->pkt_len;
+ pkt = kzalloc(buf_len, GFP_KERNEL);
+ if (!pkt) {
+ kfree(offsets);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ memcpy(pkt, dummy_pkt->pkt, etype_off);
+ memcpy(pkt + etype_off,
+ num_vlan == 2 ? ice_dummy_qinq_packet : ice_dummy_vlan_packet,
+ off);
+ memcpy(pkt + etype_off + off, dummy_pkt->pkt + etype_off,
+ dummy_pkt->pkt_len - etype_off);
+
+ profile = kzalloc(sizeof(*profile), GFP_KERNEL);
+ if (!profile) {
+ kfree(offsets);
+ kfree(pkt);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ profile->offsets = offsets;
+ profile->pkt = pkt;
+ profile->pkt_len = buf_len;
+ profile->match |= ICE_PKT_KMALLOC;
+
+ return profile;
+}
+
+/**
+ * ice_find_dummy_packet - find dummy packet
+ *
+ * @lkups: lookup elements or match criteria for the advanced recipe, one
+ * structure per protocol header
+ * @lkups_cnt: number of protocols
+ * @tun_type: tunnel type
+ *
+ * Returns the &ice_dummy_pkt_profile corresponding to these lookup params.
+ */
+static const struct ice_dummy_pkt_profile *
+ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt,
+ enum ice_sw_tunnel_type tun_type)
+{
+ const struct ice_dummy_pkt_profile *ret = ice_dummy_pkt_profiles;
+ u32 match = 0, vlan_count = 0;
+ u16 i;
+
+ switch (tun_type) {
+ case ICE_SW_TUN_GTPC:
+ match |= ICE_PKT_TUN_GTPC;
+ break;
+ case ICE_SW_TUN_GTPU:
+ match |= ICE_PKT_TUN_GTPU;
+ break;
+ case ICE_SW_TUN_NVGRE:
+ match |= ICE_PKT_TUN_NVGRE;
+ break;
+ case ICE_SW_TUN_GENEVE:
+ case ICE_SW_TUN_VXLAN:
+ match |= ICE_PKT_TUN_UDP;
+ break;
+ default:
+ break;
+ }
+
+ for (i = 0; i < lkups_cnt; i++) {
+ if (lkups[i].type == ICE_UDP_ILOS)
+ match |= ICE_PKT_INNER_UDP;
+ else if (lkups[i].type == ICE_TCP_IL)
+ match |= ICE_PKT_INNER_TCP;
+ else if (lkups[i].type == ICE_IPV6_OFOS)
+ match |= ICE_PKT_OUTER_IPV6;
+ else if (lkups[i].type == ICE_VLAN_OFOS ||
+ lkups[i].type == ICE_VLAN_EX)
+ vlan_count++;
+ else if (lkups[i].type == ICE_VLAN_IN)
+ vlan_count++;
+ else if (lkups[i].type == ICE_ETYPE_OL &&
+ lkups[i].h_u.ethertype.ethtype_id ==
+ cpu_to_be16(ICE_IPV6_ETHER_ID) &&
+ lkups[i].m_u.ethertype.ethtype_id ==
+ cpu_to_be16(0xFFFF))
+ match |= ICE_PKT_OUTER_IPV6;
+ else if (lkups[i].type == ICE_ETYPE_IL &&
+ lkups[i].h_u.ethertype.ethtype_id ==
+ cpu_to_be16(ICE_IPV6_ETHER_ID) &&
+ lkups[i].m_u.ethertype.ethtype_id ==
+ cpu_to_be16(0xFFFF))
+ match |= ICE_PKT_INNER_IPV6;
+ else if (lkups[i].type == ICE_IPV6_IL)
+ match |= ICE_PKT_INNER_IPV6;
+ else if (lkups[i].type == ICE_GTP_NO_PAY)
+ match |= ICE_PKT_GTP_NOPAY;
+ else if (lkups[i].type == ICE_PPPOE) {
+ match |= ICE_PKT_PPPOE;
+ if (lkups[i].h_u.pppoe_hdr.ppp_prot_id ==
+ htons(PPP_IPV6))
+ match |= ICE_PKT_OUTER_IPV6;
+ } else if (lkups[i].type == ICE_L2TPV3)
+ match |= ICE_PKT_L2TPV3;
+ }
+
+ while (ret->match && (match & ret->match) != ret->match)
+ ret++;
+
+ if (vlan_count != 0)
+ ret = ice_dummy_packet_add_vlan(ret, vlan_count);
+
+ return ret;
+}
+
+/**
+ * ice_fill_adv_dummy_packet - fill a dummy packet with given match criteria
+ *
+ * @lkups: lookup elements or match criteria for the advanced recipe, one
+ * structure per protocol header
+ * @lkups_cnt: number of protocols
+ * @s_rule: stores rule information from the match criteria
+ * @profile: dummy packet profile (the template, its size and header offsets)
+ */
+static int
+ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt,
+ struct ice_sw_rule_lkup_rx_tx *s_rule,
+ const struct ice_dummy_pkt_profile *profile)
+{
+ u8 *pkt;
+ u16 i;
+
+ /* Start with a packet with a pre-defined/dummy content. Then, fill
+ * in the header values to be looked up or matched.
+ */
+ pkt = s_rule->hdr_data;
+
+ memcpy(pkt, profile->pkt, profile->pkt_len);
+
+ for (i = 0; i < lkups_cnt; i++) {
+ const struct ice_dummy_pkt_offsets *offsets = profile->offsets;
+ enum ice_protocol_type type;
+ u16 offset = 0, len = 0, j;
+ bool found = false;
+
+ /* find the start of this layer; it should be found since this
+ * was already checked when search for the dummy packet
+ */
+ type = lkups[i].type;
+ for (j = 0; offsets[j].type != ICE_PROTOCOL_LAST; j++) {
+ if (type == offsets[j].type) {
+ offset = offsets[j].offset;
+ found = true;
+ break;
+ }
+ }
+ /* this should never happen in a correct calling sequence */
+ if (!found)
+ return -EINVAL;
+
+ switch (lkups[i].type) {
+ case ICE_MAC_OFOS:
+ case ICE_MAC_IL:
+ len = sizeof(struct ice_ether_hdr);
+ break;
+ case ICE_ETYPE_OL:
+ case ICE_ETYPE_IL:
+ len = sizeof(struct ice_ethtype_hdr);
+ break;
+ case ICE_VLAN_OFOS:
+ case ICE_VLAN_EX:
+ case ICE_VLAN_IN:
+ len = sizeof(struct ice_vlan_hdr);
+ break;
+ case ICE_IPV4_OFOS:
+ case ICE_IPV4_IL:
+ len = sizeof(struct ice_ipv4_hdr);
+ break;
+ case ICE_IPV6_OFOS:
+ case ICE_IPV6_IL:
+ len = sizeof(struct ice_ipv6_hdr);
+ break;
+ case ICE_TCP_IL:
+ case ICE_UDP_OF:
+ case ICE_UDP_ILOS:
+ len = sizeof(struct ice_l4_hdr);
+ break;
+ case ICE_SCTP_IL:
+ len = sizeof(struct ice_sctp_hdr);
+ break;
+ case ICE_NVGRE:
+ len = sizeof(struct ice_nvgre_hdr);
+ break;
+ case ICE_VXLAN:
+ case ICE_GENEVE:
+ len = sizeof(struct ice_udp_tnl_hdr);
+ break;
+ case ICE_GTP_NO_PAY:
+ case ICE_GTP:
+ len = sizeof(struct ice_udp_gtp_hdr);
+ break;
+ case ICE_PPPOE:
+ len = sizeof(struct ice_pppoe_hdr);
+ break;
+ case ICE_L2TPV3:
+ len = sizeof(struct ice_l2tpv3_sess_hdr);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* the length should be a word multiple */
+ if (len % ICE_BYTES_PER_WORD)
+ return -EIO;
+
+ /* We have the offset to the header start, the length, the
+ * caller's header values and mask. Use this information to
+ * copy the data into the dummy packet appropriately based on
+ * the mask. Note that we need to only write the bits as
+ * indicated by the mask to make sure we don't improperly write
+ * over any significant packet data.
+ */
+ for (j = 0; j < len / sizeof(u16); j++) {
+ u16 *ptr = (u16 *)(pkt + offset);
+ u16 mask = lkups[i].m_raw[j];
+
+ if (!mask)
+ continue;
+
+ ptr[j] = (ptr[j] & ~mask) | (lkups[i].h_raw[j] & mask);
+ }
+ }
+
+ s_rule->hdr_len = cpu_to_le16(profile->pkt_len);
+
+ return 0;
+}
+
+/**
+ * ice_fill_adv_packet_tun - fill dummy packet with udp tunnel port
+ * @hw: pointer to the hardware structure
+ * @tun_type: tunnel type
+ * @pkt: dummy packet to fill in
+ * @offsets: offset info for the dummy packet
+ */
+static int
+ice_fill_adv_packet_tun(struct ice_hw *hw, enum ice_sw_tunnel_type tun_type,
+ u8 *pkt, const struct ice_dummy_pkt_offsets *offsets)
+{
+ u16 open_port, i;
+
+ switch (tun_type) {
+ case ICE_SW_TUN_VXLAN:
+ if (!ice_get_open_tunnel_port(hw, &open_port, TNL_VXLAN))
+ return -EIO;
+ break;
+ case ICE_SW_TUN_GENEVE:
+ if (!ice_get_open_tunnel_port(hw, &open_port, TNL_GENEVE))
+ return -EIO;
+ break;
+ default:
+ /* Nothing needs to be done for this tunnel type */
+ return 0;
+ }
+
+ /* Find the outer UDP protocol header and insert the port number */
+ for (i = 0; offsets[i].type != ICE_PROTOCOL_LAST; i++) {
+ if (offsets[i].type == ICE_UDP_OF) {
+ struct ice_l4_hdr *hdr;
+ u16 offset;
+
+ offset = offsets[i].offset;
+ hdr = (struct ice_l4_hdr *)&pkt[offset];
+ hdr->dst_port = cpu_to_be16(open_port);
+
+ return 0;
+ }
+ }
+
+ return -EIO;
+}
+
+/**
+ * ice_fill_adv_packet_vlan - fill dummy packet with VLAN tag type
+ * @vlan_type: VLAN tag type
+ * @pkt: dummy packet to fill in
+ * @offsets: offset info for the dummy packet
+ */
+static int
+ice_fill_adv_packet_vlan(u16 vlan_type, u8 *pkt,
+ const struct ice_dummy_pkt_offsets *offsets)
+{
+ u16 i;
+
+ /* Find VLAN header and insert VLAN TPID */
+ for (i = 0; offsets[i].type != ICE_PROTOCOL_LAST; i++) {
+ if (offsets[i].type == ICE_VLAN_OFOS ||
+ offsets[i].type == ICE_VLAN_EX) {
+ struct ice_vlan_hdr *hdr;
+ u16 offset;
+
+ offset = offsets[i].offset;
+ hdr = (struct ice_vlan_hdr *)&pkt[offset];
+ hdr->type = cpu_to_be16(vlan_type);
+
+ return 0;
+ }
+ }
+
+ return -EIO;
+}
+
+/**
+ * ice_find_adv_rule_entry - Search a rule entry
+ * @hw: pointer to the hardware structure
+ * @lkups: lookup elements or match criteria for the advanced recipe, one
+ * structure per protocol header
+ * @lkups_cnt: number of protocols
+ * @recp_id: recipe ID for which we are finding the rule
+ * @rinfo: other information regarding the rule e.g. priority and action info
+ *
+ * Helper function to search for a given advance rule entry
+ * Returns pointer to entry storing the rule if found
+ */
+static struct ice_adv_fltr_mgmt_list_entry *
+ice_find_adv_rule_entry(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
+ u16 lkups_cnt, u16 recp_id,
+ struct ice_adv_rule_info *rinfo)
+{
+ struct ice_adv_fltr_mgmt_list_entry *list_itr;
+ struct ice_switch_info *sw = hw->switch_info;
+ int i;
+
+ list_for_each_entry(list_itr, &sw->recp_list[recp_id].filt_rules,
+ list_entry) {
+ bool lkups_matched = true;
+
+ if (lkups_cnt != list_itr->lkups_cnt)
+ continue;
+ for (i = 0; i < list_itr->lkups_cnt; i++)
+ if (memcmp(&list_itr->lkups[i], &lkups[i],
+ sizeof(*lkups))) {
+ lkups_matched = false;
+ break;
+ }
+ if (rinfo->sw_act.flag == list_itr->rule_info.sw_act.flag &&
+ rinfo->tun_type == list_itr->rule_info.tun_type &&
+ rinfo->vlan_type == list_itr->rule_info.vlan_type &&
+ lkups_matched)
+ return list_itr;
+ }
+ return NULL;
+}
+
+/**
+ * ice_adv_add_update_vsi_list
+ * @hw: pointer to the hardware structure
+ * @m_entry: pointer to current adv filter management list entry
+ * @cur_fltr: filter information from the book keeping entry
+ * @new_fltr: filter information with the new VSI to be added
+ *
+ * Call AQ command to add or update previously created VSI list with new VSI.
+ *
+ * Helper function to do book keeping associated with adding filter information
+ * The algorithm to do the booking keeping is described below :
+ * When a VSI needs to subscribe to a given advanced filter
+ * if only one VSI has been added till now
+ * Allocate a new VSI list and add two VSIs
+ * to this list using switch rule command
+ * Update the previously created switch rule with the
+ * newly created VSI list ID
+ * if a VSI list was previously created
+ * Add the new VSI to the previously created VSI list set
+ * using the update switch rule command
+ */
+static int
+ice_adv_add_update_vsi_list(struct ice_hw *hw,
+ struct ice_adv_fltr_mgmt_list_entry *m_entry,
+ struct ice_adv_rule_info *cur_fltr,
+ struct ice_adv_rule_info *new_fltr)
+{
+ u16 vsi_list_id = 0;
+ int status;
+
+ if (cur_fltr->sw_act.fltr_act == ICE_FWD_TO_Q ||
+ cur_fltr->sw_act.fltr_act == ICE_FWD_TO_QGRP ||
+ cur_fltr->sw_act.fltr_act == ICE_DROP_PACKET)
+ return -EOPNOTSUPP;
+
+ if ((new_fltr->sw_act.fltr_act == ICE_FWD_TO_Q ||
+ new_fltr->sw_act.fltr_act == ICE_FWD_TO_QGRP) &&
+ (cur_fltr->sw_act.fltr_act == ICE_FWD_TO_VSI ||
+ cur_fltr->sw_act.fltr_act == ICE_FWD_TO_VSI_LIST))
+ return -EOPNOTSUPP;
+
+ if (m_entry->vsi_count < 2 && !m_entry->vsi_list_info) {
+ /* Only one entry existed in the mapping and it was not already
+ * a part of a VSI list. So, create a VSI list with the old and
+ * new VSIs.
+ */
+ struct ice_fltr_info tmp_fltr;
+ u16 vsi_handle_arr[2];
+
+ /* A rule already exists with the new VSI being added */
+ if (cur_fltr->sw_act.fwd_id.hw_vsi_id ==
+ new_fltr->sw_act.fwd_id.hw_vsi_id)
+ return -EEXIST;
+
+ vsi_handle_arr[0] = cur_fltr->sw_act.vsi_handle;
+ vsi_handle_arr[1] = new_fltr->sw_act.vsi_handle;
+ status = ice_create_vsi_list_rule(hw, &vsi_handle_arr[0], 2,
+ &vsi_list_id,
+ ICE_SW_LKUP_LAST);
+ if (status)
+ return status;
+
+ memset(&tmp_fltr, 0, sizeof(tmp_fltr));
+ tmp_fltr.flag = m_entry->rule_info.sw_act.flag;
+ tmp_fltr.fltr_rule_id = cur_fltr->fltr_rule_id;
+ tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST;
+ tmp_fltr.fwd_id.vsi_list_id = vsi_list_id;
+ tmp_fltr.lkup_type = ICE_SW_LKUP_LAST;
+
+ /* Update the previous switch rule of "forward to VSI" to
+ * "fwd to VSI list"
+ */
+ status = ice_update_pkt_fwd_rule(hw, &tmp_fltr);
+ if (status)
+ return status;
+
+ cur_fltr->sw_act.fwd_id.vsi_list_id = vsi_list_id;
+ cur_fltr->sw_act.fltr_act = ICE_FWD_TO_VSI_LIST;
+ m_entry->vsi_list_info =
+ ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2,
+ vsi_list_id);
+ } else {
+ u16 vsi_handle = new_fltr->sw_act.vsi_handle;
+
+ if (!m_entry->vsi_list_info)
+ return -EIO;
+
+ /* A rule already exists with the new VSI being added */
+ if (test_bit(vsi_handle, m_entry->vsi_list_info->vsi_map))
+ return 0;
+
+ /* Update the previously created VSI list set with
+ * the new VSI ID passed in
+ */
+ vsi_list_id = cur_fltr->sw_act.fwd_id.vsi_list_id;
+
+ status = ice_update_vsi_list_rule(hw, &vsi_handle, 1,
+ vsi_list_id, false,
+ ice_aqc_opc_update_sw_rules,
+ ICE_SW_LKUP_LAST);
+ /* update VSI list mapping info with new VSI ID */
+ if (!status)
+ set_bit(vsi_handle, m_entry->vsi_list_info->vsi_map);
+ }
+ if (!status)
+ m_entry->vsi_count++;
+ return status;
+}
+
+/**
+ * ice_add_adv_rule - helper function to create an advanced switch rule
+ * @hw: pointer to the hardware structure
+ * @lkups: information on the words that needs to be looked up. All words
+ * together makes one recipe
+ * @lkups_cnt: num of entries in the lkups array
+ * @rinfo: other information related to the rule that needs to be programmed
+ * @added_entry: this will return recipe_id, rule_id and vsi_handle. should be
+ * ignored is case of error.
+ *
+ * This function can program only 1 rule at a time. The lkups is used to
+ * describe the all the words that forms the "lookup" portion of the recipe.
+ * These words can span multiple protocols. Callers to this function need to
+ * pass in a list of protocol headers with lookup information along and mask
+ * that determines which words are valid from the given protocol header.
+ * rinfo describes other information related to this rule such as forwarding
+ * IDs, priority of this rule, etc.
+ */
+int
+ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
+ u16 lkups_cnt, struct ice_adv_rule_info *rinfo,
+ struct ice_rule_query_data *added_entry)
+{
+ struct ice_adv_fltr_mgmt_list_entry *m_entry, *adv_fltr = NULL;
+ struct ice_sw_rule_lkup_rx_tx *s_rule = NULL;
+ const struct ice_dummy_pkt_profile *profile;
+ u16 rid = 0, i, rule_buf_sz, vsi_handle;
+ struct list_head *rule_head;
+ struct ice_switch_info *sw;
+ u16 word_cnt;
+ u32 act = 0;
+ int status;
+ u8 q_rgn;
+
+ /* Initialize profile to result index bitmap */
+ if (!hw->switch_info->prof_res_bm_init) {
+ hw->switch_info->prof_res_bm_init = 1;
+ ice_init_prof_result_bm(hw);
+ }
+
+ if (!lkups_cnt)
+ return -EINVAL;
+
+ /* get # of words we need to match */
+ word_cnt = 0;
+ for (i = 0; i < lkups_cnt; i++) {
+ u16 j;
+
+ for (j = 0; j < ARRAY_SIZE(lkups->m_raw); j++)
+ if (lkups[i].m_raw[j])
+ word_cnt++;
+ }
+
+ if (!word_cnt)
+ return -EINVAL;
+
+ if (word_cnt > ICE_MAX_CHAIN_WORDS)
+ return -ENOSPC;
+
+ /* locate a dummy packet */
+ profile = ice_find_dummy_packet(lkups, lkups_cnt, rinfo->tun_type);
+ if (IS_ERR(profile))
+ return PTR_ERR(profile);
+
+ if (!(rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI ||
+ rinfo->sw_act.fltr_act == ICE_FWD_TO_Q ||
+ rinfo->sw_act.fltr_act == ICE_FWD_TO_QGRP ||
+ rinfo->sw_act.fltr_act == ICE_DROP_PACKET)) {
+ status = -EIO;
+ goto free_pkt_profile;
+ }
+
+ vsi_handle = rinfo->sw_act.vsi_handle;
+ if (!ice_is_vsi_valid(hw, vsi_handle)) {
+ status = -EINVAL;
+ goto free_pkt_profile;
+ }
+
+ if (rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI)
+ rinfo->sw_act.fwd_id.hw_vsi_id =
+ ice_get_hw_vsi_num(hw, vsi_handle);
+ if (rinfo->sw_act.flag & ICE_FLTR_TX)
+ rinfo->sw_act.src = ice_get_hw_vsi_num(hw, vsi_handle);
+
+ status = ice_add_adv_recipe(hw, lkups, lkups_cnt, rinfo, &rid);
+ if (status)
+ goto free_pkt_profile;
+ m_entry = ice_find_adv_rule_entry(hw, lkups, lkups_cnt, rid, rinfo);
+ if (m_entry) {
+ /* we have to add VSI to VSI_LIST and increment vsi_count.
+ * Also Update VSI list so that we can change forwarding rule
+ * if the rule already exists, we will check if it exists with
+ * same vsi_id, if not then add it to the VSI list if it already
+ * exists if not then create a VSI list and add the existing VSI
+ * ID and the new VSI ID to the list
+ * We will add that VSI to the list
+ */
+ status = ice_adv_add_update_vsi_list(hw, m_entry,
+ &m_entry->rule_info,
+ rinfo);
+ if (added_entry) {
+ added_entry->rid = rid;
+ added_entry->rule_id = m_entry->rule_info.fltr_rule_id;
+ added_entry->vsi_handle = rinfo->sw_act.vsi_handle;
+ }
+ goto free_pkt_profile;
+ }
+ rule_buf_sz = ICE_SW_RULE_RX_TX_HDR_SIZE(s_rule, profile->pkt_len);
+ s_rule = kzalloc(rule_buf_sz, GFP_KERNEL);
+ if (!s_rule) {
+ status = -ENOMEM;
+ goto free_pkt_profile;
+ }
+ if (!rinfo->flags_info.act_valid) {
+ act |= ICE_SINGLE_ACT_LAN_ENABLE;
+ act |= ICE_SINGLE_ACT_LB_ENABLE;
+ } else {
+ act |= rinfo->flags_info.act & (ICE_SINGLE_ACT_LAN_ENABLE |
+ ICE_SINGLE_ACT_LB_ENABLE);
+ }
+
+ switch (rinfo->sw_act.fltr_act) {
+ case ICE_FWD_TO_VSI:
+ act |= (rinfo->sw_act.fwd_id.hw_vsi_id <<
+ ICE_SINGLE_ACT_VSI_ID_S) & ICE_SINGLE_ACT_VSI_ID_M;
+ act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT;
+ break;
+ case ICE_FWD_TO_Q:
+ act |= ICE_SINGLE_ACT_TO_Q;
+ act |= (rinfo->sw_act.fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) &
+ ICE_SINGLE_ACT_Q_INDEX_M;
+ break;
+ case ICE_FWD_TO_QGRP:
+ q_rgn = rinfo->sw_act.qgrp_size > 0 ?
+ (u8)ilog2(rinfo->sw_act.qgrp_size) : 0;
+ act |= ICE_SINGLE_ACT_TO_Q;
+ act |= (rinfo->sw_act.fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) &
+ ICE_SINGLE_ACT_Q_INDEX_M;
+ act |= (q_rgn << ICE_SINGLE_ACT_Q_REGION_S) &
+ ICE_SINGLE_ACT_Q_REGION_M;
+ break;
+ case ICE_DROP_PACKET:
+ act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_DROP |
+ ICE_SINGLE_ACT_VALID_BIT;
+ break;
+ default:
+ status = -EIO;
+ goto err_ice_add_adv_rule;
+ }
+
+ /* set the rule LOOKUP type based on caller specified 'Rx'
+ * instead of hardcoding it to be either LOOKUP_TX/RX
+ *
+ * for 'Rx' set the source to be the port number
+ * for 'Tx' set the source to be the source HW VSI number (determined
+ * by caller)
+ */
+ if (rinfo->rx) {
+ s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX);
+ s_rule->src = cpu_to_le16(hw->port_info->lport);
+ } else {
+ s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX);
+ s_rule->src = cpu_to_le16(rinfo->sw_act.src);
+ }
+
+ s_rule->recipe_id = cpu_to_le16(rid);
+ s_rule->act = cpu_to_le32(act);
+
+ status = ice_fill_adv_dummy_packet(lkups, lkups_cnt, s_rule, profile);
+ if (status)
+ goto err_ice_add_adv_rule;
+
+ if (rinfo->tun_type != ICE_NON_TUN &&
+ rinfo->tun_type != ICE_SW_TUN_AND_NON_TUN) {
+ status = ice_fill_adv_packet_tun(hw, rinfo->tun_type,
+ s_rule->hdr_data,
+ profile->offsets);
+ if (status)
+ goto err_ice_add_adv_rule;
+ }
+
+ if (rinfo->vlan_type != 0 && ice_is_dvm_ena(hw)) {
+ status = ice_fill_adv_packet_vlan(rinfo->vlan_type,
+ s_rule->hdr_data,
+ profile->offsets);
+ if (status)
+ goto err_ice_add_adv_rule;
+ }
+
+ status = ice_aq_sw_rules(hw, (struct ice_aqc_sw_rules *)s_rule,
+ rule_buf_sz, 1, ice_aqc_opc_add_sw_rules,
+ NULL);
+ if (status)
+ goto err_ice_add_adv_rule;
+ adv_fltr = devm_kzalloc(ice_hw_to_dev(hw),
+ sizeof(struct ice_adv_fltr_mgmt_list_entry),
+ GFP_KERNEL);
+ if (!adv_fltr) {
+ status = -ENOMEM;
+ goto err_ice_add_adv_rule;
+ }
+
+ adv_fltr->lkups = devm_kmemdup(ice_hw_to_dev(hw), lkups,
+ lkups_cnt * sizeof(*lkups), GFP_KERNEL);
+ if (!adv_fltr->lkups) {
+ status = -ENOMEM;
+ goto err_ice_add_adv_rule;
+ }
+
+ adv_fltr->lkups_cnt = lkups_cnt;
+ adv_fltr->rule_info = *rinfo;
+ adv_fltr->rule_info.fltr_rule_id = le16_to_cpu(s_rule->index);
+ sw = hw->switch_info;
+ sw->recp_list[rid].adv_rule = true;
+ rule_head = &sw->recp_list[rid].filt_rules;
+
+ if (rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI)
+ adv_fltr->vsi_count = 1;
+
+ /* Add rule entry to book keeping list */
+ list_add(&adv_fltr->list_entry, rule_head);
+ if (added_entry) {
+ added_entry->rid = rid;
+ added_entry->rule_id = adv_fltr->rule_info.fltr_rule_id;
+ added_entry->vsi_handle = rinfo->sw_act.vsi_handle;
+ }
+err_ice_add_adv_rule:
+ if (status && adv_fltr) {
+ devm_kfree(ice_hw_to_dev(hw), adv_fltr->lkups);
+ devm_kfree(ice_hw_to_dev(hw), adv_fltr);
+ }
+
+ kfree(s_rule);
+
+free_pkt_profile:
+ if (profile->match & ICE_PKT_KMALLOC) {
+ kfree(profile->offsets);
+ kfree(profile->pkt);
+ kfree(profile);
+ }
+
+ return status;
+}
+
+/**
+ * ice_replay_vsi_fltr - Replay filters for requested VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: driver VSI handle
+ * @recp_id: Recipe ID for which rules need to be replayed
+ * @list_head: list for which filters need to be replayed
+ *
+ * Replays the filter of recipe recp_id for a VSI represented via vsi_handle.
+ * It is required to pass valid VSI handle.
+ */
+static int
+ice_replay_vsi_fltr(struct ice_hw *hw, u16 vsi_handle, u8 recp_id,
+ struct list_head *list_head)
+{
+ struct ice_fltr_mgmt_list_entry *itr;
+ int status = 0;
+ u16 hw_vsi_id;
+
+ if (list_empty(list_head))
+ return status;
+ hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+
+ list_for_each_entry(itr, list_head, list_entry) {
+ struct ice_fltr_list_entry f_entry;
+
+ f_entry.fltr_info = itr->fltr_info;
+ if (itr->vsi_count < 2 && recp_id != ICE_SW_LKUP_VLAN &&
+ itr->fltr_info.vsi_handle == vsi_handle) {
+ /* update the src in case it is VSI num */
+ if (f_entry.fltr_info.src_id == ICE_SRC_ID_VSI)
+ f_entry.fltr_info.src = hw_vsi_id;
+ status = ice_add_rule_internal(hw, recp_id, &f_entry);
+ if (status)
+ goto end;
+ continue;
+ }
+ if (!itr->vsi_list_info ||
+ !test_bit(vsi_handle, itr->vsi_list_info->vsi_map))
+ continue;
+ /* Clearing it so that the logic can add it back */
+ clear_bit(vsi_handle, itr->vsi_list_info->vsi_map);
+ f_entry.fltr_info.vsi_handle = vsi_handle;
+ f_entry.fltr_info.fltr_act = ICE_FWD_TO_VSI;
+ /* update the src in case it is VSI num */
+ if (f_entry.fltr_info.src_id == ICE_SRC_ID_VSI)
+ f_entry.fltr_info.src = hw_vsi_id;
+ if (recp_id == ICE_SW_LKUP_VLAN)
+ status = ice_add_vlan_internal(hw, &f_entry);
+ else
+ status = ice_add_rule_internal(hw, recp_id, &f_entry);
+ if (status)
+ goto end;
+ }
+end:
+ return status;
+}
+
+/**
+ * ice_adv_rem_update_vsi_list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle of the VSI to remove
+ * @fm_list: filter management entry for which the VSI list management needs to
+ * be done
+ */
+static int
+ice_adv_rem_update_vsi_list(struct ice_hw *hw, u16 vsi_handle,
+ struct ice_adv_fltr_mgmt_list_entry *fm_list)
+{
+ struct ice_vsi_list_map_info *vsi_list_info;
+ enum ice_sw_lkup_type lkup_type;
+ u16 vsi_list_id;
+ int status;
+
+ if (fm_list->rule_info.sw_act.fltr_act != ICE_FWD_TO_VSI_LIST ||
+ fm_list->vsi_count == 0)
+ return -EINVAL;
+
+ /* A rule with the VSI being removed does not exist */
+ if (!test_bit(vsi_handle, fm_list->vsi_list_info->vsi_map))
+ return -ENOENT;
+
+ lkup_type = ICE_SW_LKUP_LAST;
+ vsi_list_id = fm_list->rule_info.sw_act.fwd_id.vsi_list_id;
+ status = ice_update_vsi_list_rule(hw, &vsi_handle, 1, vsi_list_id, true,
+ ice_aqc_opc_update_sw_rules,
+ lkup_type);
+ if (status)
+ return status;
+
+ fm_list->vsi_count--;
+ clear_bit(vsi_handle, fm_list->vsi_list_info->vsi_map);
+ vsi_list_info = fm_list->vsi_list_info;
+ if (fm_list->vsi_count == 1) {
+ struct ice_fltr_info tmp_fltr;
+ u16 rem_vsi_handle;
+
+ rem_vsi_handle = find_first_bit(vsi_list_info->vsi_map,
+ ICE_MAX_VSI);
+ if (!ice_is_vsi_valid(hw, rem_vsi_handle))
+ return -EIO;
+
+ /* Make sure VSI list is empty before removing it below */
+ status = ice_update_vsi_list_rule(hw, &rem_vsi_handle, 1,
+ vsi_list_id, true,
+ ice_aqc_opc_update_sw_rules,
+ lkup_type);
+ if (status)
+ return status;
+
+ memset(&tmp_fltr, 0, sizeof(tmp_fltr));
+ tmp_fltr.flag = fm_list->rule_info.sw_act.flag;
+ tmp_fltr.fltr_rule_id = fm_list->rule_info.fltr_rule_id;
+ fm_list->rule_info.sw_act.fltr_act = ICE_FWD_TO_VSI;
+ tmp_fltr.fltr_act = ICE_FWD_TO_VSI;
+ tmp_fltr.fwd_id.hw_vsi_id =
+ ice_get_hw_vsi_num(hw, rem_vsi_handle);
+ fm_list->rule_info.sw_act.fwd_id.hw_vsi_id =
+ ice_get_hw_vsi_num(hw, rem_vsi_handle);
+ fm_list->rule_info.sw_act.vsi_handle = rem_vsi_handle;
+
+ /* Update the previous switch rule of "MAC forward to VSI" to
+ * "MAC fwd to VSI list"
+ */
+ status = ice_update_pkt_fwd_rule(hw, &tmp_fltr);
+ if (status) {
+ ice_debug(hw, ICE_DBG_SW, "Failed to update pkt fwd rule to FWD_TO_VSI on HW VSI %d, error %d\n",
+ tmp_fltr.fwd_id.hw_vsi_id, status);
+ return status;
+ }
+ fm_list->vsi_list_info->ref_cnt--;
+
+ /* Remove the VSI list since it is no longer used */
+ status = ice_remove_vsi_list_rule(hw, vsi_list_id, lkup_type);
+ if (status) {
+ ice_debug(hw, ICE_DBG_SW, "Failed to remove VSI list %d, error %d\n",
+ vsi_list_id, status);
+ return status;
+ }
+
+ list_del(&vsi_list_info->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), vsi_list_info);
+ fm_list->vsi_list_info = NULL;
+ }
+
+ return status;
+}
+
+/**
+ * ice_rem_adv_rule - removes existing advanced switch rule
+ * @hw: pointer to the hardware structure
+ * @lkups: information on the words that needs to be looked up. All words
+ * together makes one recipe
+ * @lkups_cnt: num of entries in the lkups array
+ * @rinfo: Its the pointer to the rule information for the rule
+ *
+ * This function can be used to remove 1 rule at a time. The lkups is
+ * used to describe all the words that forms the "lookup" portion of the
+ * rule. These words can span multiple protocols. Callers to this function
+ * need to pass in a list of protocol headers with lookup information along
+ * and mask that determines which words are valid from the given protocol
+ * header. rinfo describes other information related to this rule such as
+ * forwarding IDs, priority of this rule, etc.
+ */
+static int
+ice_rem_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
+ u16 lkups_cnt, struct ice_adv_rule_info *rinfo)
+{
+ struct ice_adv_fltr_mgmt_list_entry *list_elem;
+ struct ice_prot_lkup_ext lkup_exts;
+ bool remove_rule = false;
+ struct mutex *rule_lock; /* Lock to protect filter rule list */
+ u16 i, rid, vsi_handle;
+ int status = 0;
+
+ memset(&lkup_exts, 0, sizeof(lkup_exts));
+ for (i = 0; i < lkups_cnt; i++) {
+ u16 count;
+
+ if (lkups[i].type >= ICE_PROTOCOL_LAST)
+ return -EIO;
+
+ count = ice_fill_valid_words(&lkups[i], &lkup_exts);
+ if (!count)
+ return -EIO;
+ }
+
+ /* Create any special protocol/offset pairs, such as looking at tunnel
+ * bits by extracting metadata
+ */
+ status = ice_add_special_words(rinfo, &lkup_exts, ice_is_dvm_ena(hw));
+ if (status)
+ return status;
+
+ rid = ice_find_recp(hw, &lkup_exts, rinfo->tun_type);
+ /* If did not find a recipe that match the existing criteria */
+ if (rid == ICE_MAX_NUM_RECIPES)
+ return -EINVAL;
+
+ rule_lock = &hw->switch_info->recp_list[rid].filt_rule_lock;
+ list_elem = ice_find_adv_rule_entry(hw, lkups, lkups_cnt, rid, rinfo);
+ /* the rule is already removed */
+ if (!list_elem)
+ return 0;
+ mutex_lock(rule_lock);
+ if (list_elem->rule_info.sw_act.fltr_act != ICE_FWD_TO_VSI_LIST) {
+ remove_rule = true;
+ } else if (list_elem->vsi_count > 1) {
+ remove_rule = false;
+ vsi_handle = rinfo->sw_act.vsi_handle;
+ status = ice_adv_rem_update_vsi_list(hw, vsi_handle, list_elem);
+ } else {
+ vsi_handle = rinfo->sw_act.vsi_handle;
+ status = ice_adv_rem_update_vsi_list(hw, vsi_handle, list_elem);
+ if (status) {
+ mutex_unlock(rule_lock);
+ return status;
+ }
+ if (list_elem->vsi_count == 0)
+ remove_rule = true;
+ }
+ mutex_unlock(rule_lock);
+ if (remove_rule) {
+ struct ice_sw_rule_lkup_rx_tx *s_rule;
+ u16 rule_buf_sz;
+
+ rule_buf_sz = ICE_SW_RULE_RX_TX_NO_HDR_SIZE(s_rule);
+ s_rule = kzalloc(rule_buf_sz, GFP_KERNEL);
+ if (!s_rule)
+ return -ENOMEM;
+ s_rule->act = 0;
+ s_rule->index = cpu_to_le16(list_elem->rule_info.fltr_rule_id);
+ s_rule->hdr_len = 0;
+ status = ice_aq_sw_rules(hw, (struct ice_aqc_sw_rules *)s_rule,
+ rule_buf_sz, 1,
+ ice_aqc_opc_remove_sw_rules, NULL);
+ if (!status || status == -ENOENT) {
+ struct ice_switch_info *sw = hw->switch_info;
+
+ mutex_lock(rule_lock);
+ list_del(&list_elem->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), list_elem->lkups);
+ devm_kfree(ice_hw_to_dev(hw), list_elem);
+ mutex_unlock(rule_lock);
+ if (list_empty(&sw->recp_list[rid].filt_rules))
+ sw->recp_list[rid].adv_rule = false;
+ }
+ kfree(s_rule);
+ }
+ return status;
+}
+
+/**
+ * ice_rem_adv_rule_by_id - removes existing advanced switch rule by ID
+ * @hw: pointer to the hardware structure
+ * @remove_entry: data struct which holds rule_id, VSI handle and recipe ID
+ *
+ * This function is used to remove 1 rule at a time. The removal is based on
+ * the remove_entry parameter. This function will remove rule for a given
+ * vsi_handle with a given rule_id which is passed as parameter in remove_entry
+ */
+int
+ice_rem_adv_rule_by_id(struct ice_hw *hw,
+ struct ice_rule_query_data *remove_entry)
+{
+ struct ice_adv_fltr_mgmt_list_entry *list_itr;
+ struct list_head *list_head;
+ struct ice_adv_rule_info rinfo;
+ struct ice_switch_info *sw;
+
+ sw = hw->switch_info;
+ if (!sw->recp_list[remove_entry->rid].recp_created)
+ return -EINVAL;
+ list_head = &sw->recp_list[remove_entry->rid].filt_rules;
+ list_for_each_entry(list_itr, list_head, list_entry) {
+ if (list_itr->rule_info.fltr_rule_id ==
+ remove_entry->rule_id) {
+ rinfo = list_itr->rule_info;
+ rinfo.sw_act.vsi_handle = remove_entry->vsi_handle;
+ return ice_rem_adv_rule(hw, list_itr->lkups,
+ list_itr->lkups_cnt, &rinfo);
+ }
+ }
+ /* either list is empty or unable to find rule */
+ return -ENOENT;
+}
+
+/**
+ * ice_rem_adv_rule_for_vsi - removes existing advanced switch rules for a
+ * given VSI handle
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle for which we are supposed to remove all the rules.
+ *
+ * This function is used to remove all the rules for a given VSI and as soon
+ * as removing a rule fails, it will return immediately with the error code,
+ * else it will return success.
+ */
+int ice_rem_adv_rule_for_vsi(struct ice_hw *hw, u16 vsi_handle)
+{
+ struct ice_adv_fltr_mgmt_list_entry *list_itr, *tmp_entry;
+ struct ice_vsi_list_map_info *map_info;
+ struct ice_adv_rule_info rinfo;
+ struct list_head *list_head;
+ struct ice_switch_info *sw;
+ int status;
+ u8 rid;
+
+ sw = hw->switch_info;
+ for (rid = 0; rid < ICE_MAX_NUM_RECIPES; rid++) {
+ if (!sw->recp_list[rid].recp_created)
+ continue;
+ if (!sw->recp_list[rid].adv_rule)
+ continue;
+
+ list_head = &sw->recp_list[rid].filt_rules;
+ list_for_each_entry_safe(list_itr, tmp_entry, list_head,
+ list_entry) {
+ rinfo = list_itr->rule_info;
+
+ if (rinfo.sw_act.fltr_act == ICE_FWD_TO_VSI_LIST) {
+ map_info = list_itr->vsi_list_info;
+ if (!map_info)
+ continue;
+
+ if (!test_bit(vsi_handle, map_info->vsi_map))
+ continue;
+ } else if (rinfo.sw_act.vsi_handle != vsi_handle) {
+ continue;
+ }
+
+ rinfo.sw_act.vsi_handle = vsi_handle;
+ status = ice_rem_adv_rule(hw, list_itr->lkups,
+ list_itr->lkups_cnt, &rinfo);
+ if (status)
+ return status;
+ }
+ }
+ return 0;
+}
+
+/**
+ * ice_replay_vsi_adv_rule - Replay advanced rule for requested VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: driver VSI handle
+ * @list_head: list for which filters need to be replayed
+ *
+ * Replay the advanced rule for the given VSI.
+ */
+static int
+ice_replay_vsi_adv_rule(struct ice_hw *hw, u16 vsi_handle,
+ struct list_head *list_head)
+{
+ struct ice_rule_query_data added_entry = { 0 };
+ struct ice_adv_fltr_mgmt_list_entry *adv_fltr;
+ int status = 0;
+
+ if (list_empty(list_head))
+ return status;
+ list_for_each_entry(adv_fltr, list_head, list_entry) {
+ struct ice_adv_rule_info *rinfo = &adv_fltr->rule_info;
+ u16 lk_cnt = adv_fltr->lkups_cnt;
+
+ if (vsi_handle != rinfo->sw_act.vsi_handle)
+ continue;
+ status = ice_add_adv_rule(hw, adv_fltr->lkups, lk_cnt, rinfo,
+ &added_entry);
+ if (status)
+ break;
+ }
+ return status;
+}
+
+/**
+ * ice_replay_vsi_all_fltr - replay all filters stored in bookkeeping lists
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: driver VSI handle
+ *
+ * Replays filters for requested VSI via vsi_handle.
+ */
+int ice_replay_vsi_all_fltr(struct ice_hw *hw, u16 vsi_handle)
+{
+ struct ice_switch_info *sw = hw->switch_info;
+ int status;
+ u8 i;
+
+ for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) {
+ struct list_head *head;
+
+ head = &sw->recp_list[i].filt_replay_rules;
+ if (!sw->recp_list[i].adv_rule)
+ status = ice_replay_vsi_fltr(hw, vsi_handle, i, head);
+ else
+ status = ice_replay_vsi_adv_rule(hw, vsi_handle, head);
+ if (status)
+ return status;
+ }
+ return status;
+}
+
+/**
+ * ice_rm_all_sw_replay_rule_info - deletes filter replay rules
+ * @hw: pointer to the HW struct
+ *
+ * Deletes the filter replay rules.
+ */
+void ice_rm_all_sw_replay_rule_info(struct ice_hw *hw)
+{
+ struct ice_switch_info *sw = hw->switch_info;
+ u8 i;
+
+ if (!sw)
+ return;
+
+ for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) {
+ if (!list_empty(&sw->recp_list[i].filt_replay_rules)) {
+ struct list_head *l_head;
+
+ l_head = &sw->recp_list[i].filt_replay_rules;
+ if (!sw->recp_list[i].adv_rule)
+ ice_rem_sw_rule_info(hw, l_head);
+ else
+ ice_rem_adv_rule_info(hw, l_head);
+ }
+ }
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
new file mode 100644
index 000000000..68d8e8a6a
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -0,0 +1,397 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_SWITCH_H_
+#define _ICE_SWITCH_H_
+
+#include "ice_common.h"
+
+#define ICE_SW_CFG_MAX_BUF_LEN 2048
+#define ICE_DFLT_VSI_INVAL 0xff
+#define ICE_FLTR_RX BIT(0)
+#define ICE_FLTR_TX BIT(1)
+#define ICE_FLTR_TX_RX (ICE_FLTR_RX | ICE_FLTR_TX)
+#define ICE_VSI_INVAL_ID 0xffff
+#define ICE_INVAL_Q_HANDLE 0xFFFF
+
+/* Switch Profile IDs for Profile related switch rules */
+#define ICE_PROFID_IPV4_GTPC_TEID 41
+#define ICE_PROFID_IPV4_GTPC_NO_TEID 42
+#define ICE_PROFID_IPV4_GTPU_TEID 43
+#define ICE_PROFID_IPV6_GTPC_TEID 44
+#define ICE_PROFID_IPV6_GTPC_NO_TEID 45
+#define ICE_PROFID_IPV6_GTPU_TEID 46
+#define ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER 70
+
+/* VSI context structure for add/get/update/free operations */
+struct ice_vsi_ctx {
+ u16 vsi_num;
+ u16 vsis_allocd;
+ u16 vsis_unallocated;
+ u16 flags;
+ struct ice_aqc_vsi_props info;
+ struct ice_sched_vsi_info sched;
+ u8 alloc_from_pool;
+ u8 vf_num;
+ u16 num_lan_q_entries[ICE_MAX_TRAFFIC_CLASS];
+ struct ice_q_ctx *lan_q_ctx[ICE_MAX_TRAFFIC_CLASS];
+ u16 num_rdma_q_entries[ICE_MAX_TRAFFIC_CLASS];
+ struct ice_q_ctx *rdma_q_ctx[ICE_MAX_TRAFFIC_CLASS];
+};
+
+/* Switch recipe ID enum values are specific to hardware */
+enum ice_sw_lkup_type {
+ ICE_SW_LKUP_ETHERTYPE = 0,
+ ICE_SW_LKUP_MAC = 1,
+ ICE_SW_LKUP_MAC_VLAN = 2,
+ ICE_SW_LKUP_PROMISC = 3,
+ ICE_SW_LKUP_VLAN = 4,
+ ICE_SW_LKUP_DFLT = 5,
+ ICE_SW_LKUP_ETHERTYPE_MAC = 8,
+ ICE_SW_LKUP_PROMISC_VLAN = 9,
+ ICE_SW_LKUP_LAST
+};
+
+/* type of filter src ID */
+enum ice_src_id {
+ ICE_SRC_ID_UNKNOWN = 0,
+ ICE_SRC_ID_VSI,
+ ICE_SRC_ID_QUEUE,
+ ICE_SRC_ID_LPORT,
+};
+
+struct ice_fltr_info {
+ /* Look up information: how to look up packet */
+ enum ice_sw_lkup_type lkup_type;
+ /* Forward action: filter action to do after lookup */
+ enum ice_sw_fwd_act_type fltr_act;
+ /* rule ID returned by firmware once filter rule is created */
+ u16 fltr_rule_id;
+ u16 flag;
+
+ /* Source VSI for LOOKUP_TX or source port for LOOKUP_RX */
+ u16 src;
+ enum ice_src_id src_id;
+
+ union {
+ struct {
+ u8 mac_addr[ETH_ALEN];
+ } mac;
+ struct {
+ u8 mac_addr[ETH_ALEN];
+ u16 vlan_id;
+ } mac_vlan;
+ struct {
+ u16 vlan_id;
+ u16 tpid;
+ u8 tpid_valid;
+ } vlan;
+ /* Set lkup_type as ICE_SW_LKUP_ETHERTYPE
+ * if just using ethertype as filter. Set lkup_type as
+ * ICE_SW_LKUP_ETHERTYPE_MAC if MAC also needs to be
+ * passed in as filter.
+ */
+ struct {
+ u16 ethertype;
+ u8 mac_addr[ETH_ALEN]; /* optional */
+ } ethertype_mac;
+ } l_data; /* Make sure to zero out the memory of l_data before using
+ * it or only set the data associated with lookup match
+ * rest everything should be zero
+ */
+
+ /* Depending on filter action */
+ union {
+ /* queue ID in case of ICE_FWD_TO_Q and starting
+ * queue ID in case of ICE_FWD_TO_QGRP.
+ */
+ u16 q_id:11;
+ u16 hw_vsi_id:10;
+ u16 vsi_list_id:10;
+ } fwd_id;
+
+ /* Sw VSI handle */
+ u16 vsi_handle;
+
+ /* Set to num_queues if action is ICE_FWD_TO_QGRP. This field
+ * determines the range of queues the packet needs to be forwarded to.
+ * Note that qgrp_size must be set to a power of 2.
+ */
+ u8 qgrp_size;
+
+ /* Rule creations populate these indicators basing on the switch type */
+ u8 lb_en; /* Indicate if packet can be looped back */
+ u8 lan_en; /* Indicate if packet can be forwarded to the uplink */
+};
+
+struct ice_update_recipe_lkup_idx_params {
+ u16 rid;
+ u16 fv_idx;
+ bool ignore_valid;
+ u16 mask;
+ bool mask_valid;
+ u8 lkup_idx;
+};
+
+struct ice_adv_lkup_elem {
+ enum ice_protocol_type type;
+ union {
+ union ice_prot_hdr h_u; /* Header values */
+ /* Used to iterate over the headers */
+ u16 h_raw[sizeof(union ice_prot_hdr) / sizeof(u16)];
+ };
+ union {
+ union ice_prot_hdr m_u; /* Mask of header values to match */
+ /* Used to iterate over header mask */
+ u16 m_raw[sizeof(union ice_prot_hdr) / sizeof(u16)];
+ };
+};
+
+struct ice_sw_act_ctrl {
+ /* Source VSI for LOOKUP_TX or source port for LOOKUP_RX */
+ u16 src;
+ u16 flag;
+ enum ice_sw_fwd_act_type fltr_act;
+ /* Depending on filter action */
+ union {
+ /* This is a queue ID in case of ICE_FWD_TO_Q and starting
+ * queue ID in case of ICE_FWD_TO_QGRP.
+ */
+ u16 q_id:11;
+ u16 vsi_id:10;
+ u16 hw_vsi_id:10;
+ u16 vsi_list_id:10;
+ } fwd_id;
+ /* software VSI handle */
+ u16 vsi_handle;
+ u8 qgrp_size;
+};
+
+struct ice_rule_query_data {
+ /* Recipe ID for which the requested rule was added */
+ u16 rid;
+ /* Rule ID that was added or is supposed to be removed */
+ u16 rule_id;
+ /* vsi_handle for which Rule was added or is supposed to be removed */
+ u16 vsi_handle;
+};
+
+/* This structure allows to pass info about lb_en and lan_en
+ * flags to ice_add_adv_rule. Values in act would be used
+ * only if act_valid was set to true, otherwise default
+ * values would be used.
+ */
+struct ice_adv_rule_flags_info {
+ u32 act;
+ u8 act_valid; /* indicate if flags in act are valid */
+};
+
+struct ice_adv_rule_info {
+ enum ice_sw_tunnel_type tun_type;
+ struct ice_sw_act_ctrl sw_act;
+ u32 priority;
+ u8 rx; /* true means LOOKUP_RX otherwise LOOKUP_TX */
+ u16 fltr_rule_id;
+ u16 vlan_type;
+ struct ice_adv_rule_flags_info flags_info;
+};
+
+/* A collection of one or more four word recipe */
+struct ice_sw_recipe {
+ /* For a chained recipe the root recipe is what should be used for
+ * programming rules
+ */
+ u8 is_root;
+ u8 root_rid;
+ u8 recp_created;
+
+ /* Number of extraction words */
+ u8 n_ext_words;
+ /* Protocol ID and Offset pair (extraction word) to describe the
+ * recipe
+ */
+ struct ice_fv_word ext_words[ICE_MAX_CHAIN_WORDS];
+ u16 word_masks[ICE_MAX_CHAIN_WORDS];
+
+ /* if this recipe is a collection of other recipe */
+ u8 big_recp;
+
+ /* if this recipe is part of another bigger recipe then chain index
+ * corresponding to this recipe
+ */
+ u8 chain_idx;
+
+ /* if this recipe is a collection of other recipe then count of other
+ * recipes and recipe IDs of those recipes
+ */
+ u8 n_grp_count;
+
+ /* Bit map specifying the IDs associated with this group of recipe */
+ DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
+
+ enum ice_sw_tunnel_type tun_type;
+
+ /* List of type ice_fltr_mgmt_list_entry or adv_rule */
+ u8 adv_rule;
+ struct list_head filt_rules;
+ struct list_head filt_replay_rules;
+
+ struct mutex filt_rule_lock; /* protect filter rule structure */
+
+ /* Profiles this recipe should be associated with */
+ struct list_head fv_list;
+
+ /* Profiles this recipe is associated with */
+ u8 num_profs, *prof_ids;
+
+ /* Bit map for possible result indexes */
+ DECLARE_BITMAP(res_idxs, ICE_MAX_FV_WORDS);
+
+ /* This allows user to specify the recipe priority.
+ * For now, this becomes 'fwd_priority' when recipe
+ * is created, usually recipes can have 'fwd' and 'join'
+ * priority.
+ */
+ u8 priority;
+
+ struct list_head rg_list;
+
+ /* AQ buffer associated with this recipe */
+ struct ice_aqc_recipe_data_elem *root_buf;
+ /* This struct saves the fv_words for a given lookup */
+ struct ice_prot_lkup_ext lkup_exts;
+};
+
+/* Bookkeeping structure to hold bitmap of VSIs corresponding to VSI list ID */
+struct ice_vsi_list_map_info {
+ struct list_head list_entry;
+ DECLARE_BITMAP(vsi_map, ICE_MAX_VSI);
+ u16 vsi_list_id;
+ /* counter to track how many rules are reusing this VSI list */
+ u16 ref_cnt;
+};
+
+struct ice_fltr_list_entry {
+ struct list_head list_entry;
+ int status;
+ struct ice_fltr_info fltr_info;
+};
+
+/* This defines an entry in the list that maintains MAC or VLAN membership
+ * to HW list mapping, since multiple VSIs can subscribe to the same MAC or
+ * VLAN. As an optimization the VSI list should be created only when a
+ * second VSI becomes a subscriber to the same MAC address. VSI lists are always
+ * used for VLAN membership.
+ */
+struct ice_fltr_mgmt_list_entry {
+ /* back pointer to VSI list ID to VSI list mapping */
+ struct ice_vsi_list_map_info *vsi_list_info;
+ u16 vsi_count;
+#define ICE_INVAL_LG_ACT_INDEX 0xffff
+ u16 lg_act_idx;
+#define ICE_INVAL_SW_MARKER_ID 0xffff
+ u16 sw_marker_id;
+ struct list_head list_entry;
+ struct ice_fltr_info fltr_info;
+#define ICE_INVAL_COUNTER_ID 0xff
+ u8 counter_index;
+};
+
+struct ice_adv_fltr_mgmt_list_entry {
+ struct list_head list_entry;
+
+ struct ice_adv_lkup_elem *lkups;
+ struct ice_adv_rule_info rule_info;
+ u16 lkups_cnt;
+ struct ice_vsi_list_map_info *vsi_list_info;
+ u16 vsi_count;
+};
+
+enum ice_promisc_flags {
+ ICE_PROMISC_UCAST_RX = 0x1,
+ ICE_PROMISC_UCAST_TX = 0x2,
+ ICE_PROMISC_MCAST_RX = 0x4,
+ ICE_PROMISC_MCAST_TX = 0x8,
+ ICE_PROMISC_BCAST_RX = 0x10,
+ ICE_PROMISC_BCAST_TX = 0x20,
+ ICE_PROMISC_VLAN_RX = 0x40,
+ ICE_PROMISC_VLAN_TX = 0x80,
+};
+
+/* VSI related commands */
+int
+ice_add_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+ struct ice_sq_cd *cd);
+int
+ice_free_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+ bool keep_vsi_alloc, struct ice_sq_cd *cd);
+int
+ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+ struct ice_sq_cd *cd);
+bool ice_is_vsi_valid(struct ice_hw *hw, u16 vsi_handle);
+struct ice_vsi_ctx *ice_get_vsi_ctx(struct ice_hw *hw, u16 vsi_handle);
+void ice_clear_all_vsi_ctx(struct ice_hw *hw);
+/* Switch config */
+int ice_get_initial_sw_cfg(struct ice_hw *hw);
+
+int
+ice_alloc_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
+ u16 *counter_id);
+int
+ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
+ u16 counter_id);
+
+/* Switch/bridge related commands */
+int
+ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
+ u16 lkups_cnt, struct ice_adv_rule_info *rinfo,
+ struct ice_rule_query_data *added_entry);
+int ice_update_sw_rule_bridge_mode(struct ice_hw *hw);
+int ice_add_vlan(struct ice_hw *hw, struct list_head *m_list);
+int ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list);
+int ice_add_mac(struct ice_hw *hw, struct list_head *m_lst);
+int ice_remove_mac(struct ice_hw *hw, struct list_head *m_lst);
+bool ice_mac_fltr_exist(struct ice_hw *hw, u8 *mac, u16 vsi_handle);
+bool ice_vlan_fltr_exist(struct ice_hw *hw, u16 vlan_id, u16 vsi_handle);
+int ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list);
+int ice_remove_eth_mac(struct ice_hw *hw, struct list_head *em_list);
+int ice_cfg_rdma_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable);
+void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_handle);
+
+/* Promisc/defport setup for VSIs */
+int
+ice_cfg_dflt_vsi(struct ice_port_info *pi, u16 vsi_handle, bool set,
+ u8 direction);
+bool
+ice_check_if_dflt_vsi(struct ice_port_info *pi, u16 vsi_handle,
+ bool *rule_exists);
+
+int
+ice_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+ u16 vid);
+int
+ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+ u16 vid);
+int
+ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+ bool rm_vlan_promisc);
+
+int ice_rem_adv_rule_for_vsi(struct ice_hw *hw, u16 vsi_handle);
+int
+ice_rem_adv_rule_by_id(struct ice_hw *hw,
+ struct ice_rule_query_data *remove_entry);
+
+int ice_init_def_sw_recp(struct ice_hw *hw);
+u16 ice_get_hw_vsi_num(struct ice_hw *hw, u16 vsi_handle);
+
+int ice_replay_vsi_all_fltr(struct ice_hw *hw, u16 vsi_handle);
+void ice_rm_all_sw_replay_rule_info(struct ice_hw *hw);
+
+int
+ice_aq_sw_rules(struct ice_hw *hw, void *rule_list, u16 rule_list_sz,
+ u8 num_rules, enum ice_adminq_opc opc, struct ice_sq_cd *cd);
+int
+ice_update_recipe_lkup_idx(struct ice_hw *hw,
+ struct ice_update_recipe_lkup_idx_params *params);
+void ice_change_proto_id_to_dvm(void);
+#endif /* _ICE_SWITCH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
new file mode 100644
index 000000000..652ef09ee
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
@@ -0,0 +1,1823 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_tc_lib.h"
+#include "ice_fltr.h"
+#include "ice_lib.h"
+#include "ice_protocol_type.h"
+
+/**
+ * ice_tc_count_lkups - determine lookup count for switch filter
+ * @flags: TC-flower flags
+ * @headers: Pointer to TC flower filter header structure
+ * @fltr: Pointer to outer TC filter structure
+ *
+ * Determine lookup count based on TC flower input for switch filter.
+ */
+static int
+ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers,
+ struct ice_tc_flower_fltr *fltr)
+{
+ int lkups_cnt = 0;
+
+ if (flags & ICE_TC_FLWR_FIELD_TENANT_ID)
+ lkups_cnt++;
+
+ if (flags & ICE_TC_FLWR_FIELD_ENC_DST_MAC)
+ lkups_cnt++;
+
+ if (flags & ICE_TC_FLWR_FIELD_ENC_OPTS)
+ lkups_cnt++;
+
+ if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 |
+ ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 |
+ ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 |
+ ICE_TC_FLWR_FIELD_ENC_DEST_IPV6))
+ lkups_cnt++;
+
+ if (flags & (ICE_TC_FLWR_FIELD_ENC_IP_TOS |
+ ICE_TC_FLWR_FIELD_ENC_IP_TTL))
+ lkups_cnt++;
+
+ if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT)
+ lkups_cnt++;
+
+ if (flags & ICE_TC_FLWR_FIELD_ETH_TYPE_ID)
+ lkups_cnt++;
+
+ /* are MAC fields specified? */
+ if (flags & (ICE_TC_FLWR_FIELD_DST_MAC | ICE_TC_FLWR_FIELD_SRC_MAC))
+ lkups_cnt++;
+
+ /* is VLAN specified? */
+ if (flags & (ICE_TC_FLWR_FIELD_VLAN | ICE_TC_FLWR_FIELD_VLAN_PRIO))
+ lkups_cnt++;
+
+ /* is CVLAN specified? */
+ if (flags & (ICE_TC_FLWR_FIELD_CVLAN | ICE_TC_FLWR_FIELD_CVLAN_PRIO))
+ lkups_cnt++;
+
+ /* are PPPoE options specified? */
+ if (flags & (ICE_TC_FLWR_FIELD_PPPOE_SESSID |
+ ICE_TC_FLWR_FIELD_PPP_PROTO))
+ lkups_cnt++;
+
+ /* are IPv[4|6] fields specified? */
+ if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV4 | ICE_TC_FLWR_FIELD_SRC_IPV4 |
+ ICE_TC_FLWR_FIELD_DEST_IPV6 | ICE_TC_FLWR_FIELD_SRC_IPV6))
+ lkups_cnt++;
+
+ if (flags & (ICE_TC_FLWR_FIELD_IP_TOS | ICE_TC_FLWR_FIELD_IP_TTL))
+ lkups_cnt++;
+
+ /* are L2TPv3 options specified? */
+ if (flags & ICE_TC_FLWR_FIELD_L2TPV3_SESSID)
+ lkups_cnt++;
+
+ /* is L4 (TCP/UDP/any other L4 protocol fields) specified? */
+ if (flags & (ICE_TC_FLWR_FIELD_DEST_L4_PORT |
+ ICE_TC_FLWR_FIELD_SRC_L4_PORT))
+ lkups_cnt++;
+
+ return lkups_cnt;
+}
+
+static enum ice_protocol_type ice_proto_type_from_mac(bool inner)
+{
+ return inner ? ICE_MAC_IL : ICE_MAC_OFOS;
+}
+
+static enum ice_protocol_type ice_proto_type_from_etype(bool inner)
+{
+ return inner ? ICE_ETYPE_IL : ICE_ETYPE_OL;
+}
+
+static enum ice_protocol_type ice_proto_type_from_ipv4(bool inner)
+{
+ return inner ? ICE_IPV4_IL : ICE_IPV4_OFOS;
+}
+
+static enum ice_protocol_type ice_proto_type_from_ipv6(bool inner)
+{
+ return inner ? ICE_IPV6_IL : ICE_IPV6_OFOS;
+}
+
+static enum ice_protocol_type ice_proto_type_from_l4_port(u16 ip_proto)
+{
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ return ICE_TCP_IL;
+ case IPPROTO_UDP:
+ return ICE_UDP_ILOS;
+ }
+
+ return 0;
+}
+
+static enum ice_protocol_type
+ice_proto_type_from_tunnel(enum ice_tunnel_type type)
+{
+ switch (type) {
+ case TNL_VXLAN:
+ return ICE_VXLAN;
+ case TNL_GENEVE:
+ return ICE_GENEVE;
+ case TNL_GRETAP:
+ return ICE_NVGRE;
+ case TNL_GTPU:
+ /* NO_PAY profiles will not work with GTP-U */
+ return ICE_GTP;
+ case TNL_GTPC:
+ return ICE_GTP_NO_PAY;
+ default:
+ return 0;
+ }
+}
+
+static enum ice_sw_tunnel_type
+ice_sw_type_from_tunnel(enum ice_tunnel_type type)
+{
+ switch (type) {
+ case TNL_VXLAN:
+ return ICE_SW_TUN_VXLAN;
+ case TNL_GENEVE:
+ return ICE_SW_TUN_GENEVE;
+ case TNL_GRETAP:
+ return ICE_SW_TUN_NVGRE;
+ case TNL_GTPU:
+ return ICE_SW_TUN_GTPU;
+ case TNL_GTPC:
+ return ICE_SW_TUN_GTPC;
+ default:
+ return ICE_NON_TUN;
+ }
+}
+
+static u16 ice_check_supported_vlan_tpid(u16 vlan_tpid)
+{
+ switch (vlan_tpid) {
+ case ETH_P_8021Q:
+ case ETH_P_8021AD:
+ case ETH_P_QINQ1:
+ return vlan_tpid;
+ default:
+ return 0;
+ }
+}
+
+static int
+ice_tc_fill_tunnel_outer(u32 flags, struct ice_tc_flower_fltr *fltr,
+ struct ice_adv_lkup_elem *list)
+{
+ struct ice_tc_flower_lyr_2_4_hdrs *hdr = &fltr->outer_headers;
+ int i = 0;
+
+ if (flags & ICE_TC_FLWR_FIELD_TENANT_ID) {
+ u32 tenant_id;
+
+ list[i].type = ice_proto_type_from_tunnel(fltr->tunnel_type);
+ switch (fltr->tunnel_type) {
+ case TNL_VXLAN:
+ case TNL_GENEVE:
+ tenant_id = be32_to_cpu(fltr->tenant_id) << 8;
+ list[i].h_u.tnl_hdr.vni = cpu_to_be32(tenant_id);
+ memcpy(&list[i].m_u.tnl_hdr.vni, "\xff\xff\xff\x00", 4);
+ i++;
+ break;
+ case TNL_GRETAP:
+ list[i].h_u.nvgre_hdr.tni_flow = fltr->tenant_id;
+ memcpy(&list[i].m_u.nvgre_hdr.tni_flow,
+ "\xff\xff\xff\xff", 4);
+ i++;
+ break;
+ case TNL_GTPC:
+ case TNL_GTPU:
+ list[i].h_u.gtp_hdr.teid = fltr->tenant_id;
+ memcpy(&list[i].m_u.gtp_hdr.teid,
+ "\xff\xff\xff\xff", 4);
+ i++;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (flags & ICE_TC_FLWR_FIELD_ENC_DST_MAC) {
+ list[i].type = ice_proto_type_from_mac(false);
+ ether_addr_copy(list[i].h_u.eth_hdr.dst_addr,
+ hdr->l2_key.dst_mac);
+ ether_addr_copy(list[i].m_u.eth_hdr.dst_addr,
+ hdr->l2_mask.dst_mac);
+ i++;
+ }
+
+ if (flags & ICE_TC_FLWR_FIELD_ENC_OPTS &&
+ (fltr->tunnel_type == TNL_GTPU || fltr->tunnel_type == TNL_GTPC)) {
+ list[i].type = ice_proto_type_from_tunnel(fltr->tunnel_type);
+
+ if (fltr->gtp_pdu_info_masks.pdu_type) {
+ list[i].h_u.gtp_hdr.pdu_type =
+ fltr->gtp_pdu_info_keys.pdu_type << 4;
+ memcpy(&list[i].m_u.gtp_hdr.pdu_type, "\xf0", 1);
+ }
+
+ if (fltr->gtp_pdu_info_masks.qfi) {
+ list[i].h_u.gtp_hdr.qfi = fltr->gtp_pdu_info_keys.qfi;
+ memcpy(&list[i].m_u.gtp_hdr.qfi, "\x3f", 1);
+ }
+
+ i++;
+ }
+
+ if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 |
+ ICE_TC_FLWR_FIELD_ENC_DEST_IPV4)) {
+ list[i].type = ice_proto_type_from_ipv4(false);
+
+ if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_IPV4) {
+ list[i].h_u.ipv4_hdr.src_addr = hdr->l3_key.src_ipv4;
+ list[i].m_u.ipv4_hdr.src_addr = hdr->l3_mask.src_ipv4;
+ }
+ if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_IPV4) {
+ list[i].h_u.ipv4_hdr.dst_addr = hdr->l3_key.dst_ipv4;
+ list[i].m_u.ipv4_hdr.dst_addr = hdr->l3_mask.dst_ipv4;
+ }
+ i++;
+ }
+
+ if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 |
+ ICE_TC_FLWR_FIELD_ENC_DEST_IPV6)) {
+ list[i].type = ice_proto_type_from_ipv6(false);
+
+ if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_IPV6) {
+ memcpy(&list[i].h_u.ipv6_hdr.src_addr,
+ &hdr->l3_key.src_ipv6_addr,
+ sizeof(hdr->l3_key.src_ipv6_addr));
+ memcpy(&list[i].m_u.ipv6_hdr.src_addr,
+ &hdr->l3_mask.src_ipv6_addr,
+ sizeof(hdr->l3_mask.src_ipv6_addr));
+ }
+ if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_IPV6) {
+ memcpy(&list[i].h_u.ipv6_hdr.dst_addr,
+ &hdr->l3_key.dst_ipv6_addr,
+ sizeof(hdr->l3_key.dst_ipv6_addr));
+ memcpy(&list[i].m_u.ipv6_hdr.dst_addr,
+ &hdr->l3_mask.dst_ipv6_addr,
+ sizeof(hdr->l3_mask.dst_ipv6_addr));
+ }
+ i++;
+ }
+
+ if (fltr->inner_headers.l2_key.n_proto == htons(ETH_P_IP) &&
+ (flags & (ICE_TC_FLWR_FIELD_ENC_IP_TOS |
+ ICE_TC_FLWR_FIELD_ENC_IP_TTL))) {
+ list[i].type = ice_proto_type_from_ipv4(false);
+
+ if (flags & ICE_TC_FLWR_FIELD_ENC_IP_TOS) {
+ list[i].h_u.ipv4_hdr.tos = hdr->l3_key.tos;
+ list[i].m_u.ipv4_hdr.tos = hdr->l3_mask.tos;
+ }
+
+ if (flags & ICE_TC_FLWR_FIELD_ENC_IP_TTL) {
+ list[i].h_u.ipv4_hdr.time_to_live = hdr->l3_key.ttl;
+ list[i].m_u.ipv4_hdr.time_to_live = hdr->l3_mask.ttl;
+ }
+
+ i++;
+ }
+
+ if (fltr->inner_headers.l2_key.n_proto == htons(ETH_P_IPV6) &&
+ (flags & (ICE_TC_FLWR_FIELD_ENC_IP_TOS |
+ ICE_TC_FLWR_FIELD_ENC_IP_TTL))) {
+ struct ice_ipv6_hdr *hdr_h, *hdr_m;
+
+ hdr_h = &list[i].h_u.ipv6_hdr;
+ hdr_m = &list[i].m_u.ipv6_hdr;
+ list[i].type = ice_proto_type_from_ipv6(false);
+
+ if (flags & ICE_TC_FLWR_FIELD_ENC_IP_TOS) {
+ be32p_replace_bits(&hdr_h->be_ver_tc_flow,
+ hdr->l3_key.tos,
+ ICE_IPV6_HDR_TC_MASK);
+ be32p_replace_bits(&hdr_m->be_ver_tc_flow,
+ hdr->l3_mask.tos,
+ ICE_IPV6_HDR_TC_MASK);
+ }
+
+ if (flags & ICE_TC_FLWR_FIELD_ENC_IP_TTL) {
+ hdr_h->hop_limit = hdr->l3_key.ttl;
+ hdr_m->hop_limit = hdr->l3_mask.ttl;
+ }
+
+ i++;
+ }
+
+ if ((flags & ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT) &&
+ hdr->l3_key.ip_proto == IPPROTO_UDP) {
+ list[i].type = ICE_UDP_OF;
+ list[i].h_u.l4_hdr.dst_port = hdr->l4_key.dst_port;
+ list[i].m_u.l4_hdr.dst_port = hdr->l4_mask.dst_port;
+ i++;
+ }
+
+ return i;
+}
+
+/**
+ * ice_tc_fill_rules - fill filter rules based on TC fltr
+ * @hw: pointer to HW structure
+ * @flags: tc flower field flags
+ * @tc_fltr: pointer to TC flower filter
+ * @list: list of advance rule elements
+ * @rule_info: pointer to information about rule
+ * @l4_proto: pointer to information such as L4 proto type
+ *
+ * Fill ice_adv_lkup_elem list based on TC flower flags and
+ * TC flower headers. This list should be used to add
+ * advance filter in hardware.
+ */
+static int
+ice_tc_fill_rules(struct ice_hw *hw, u32 flags,
+ struct ice_tc_flower_fltr *tc_fltr,
+ struct ice_adv_lkup_elem *list,
+ struct ice_adv_rule_info *rule_info,
+ u16 *l4_proto)
+{
+ struct ice_tc_flower_lyr_2_4_hdrs *headers = &tc_fltr->outer_headers;
+ bool inner = false;
+ u16 vlan_tpid = 0;
+ int i = 0;
+
+ rule_info->vlan_type = vlan_tpid;
+
+ rule_info->tun_type = ice_sw_type_from_tunnel(tc_fltr->tunnel_type);
+ if (tc_fltr->tunnel_type != TNL_LAST) {
+ i = ice_tc_fill_tunnel_outer(flags, tc_fltr, list);
+
+ headers = &tc_fltr->inner_headers;
+ inner = true;
+ }
+
+ if (flags & ICE_TC_FLWR_FIELD_ETH_TYPE_ID) {
+ list[i].type = ice_proto_type_from_etype(inner);
+ list[i].h_u.ethertype.ethtype_id = headers->l2_key.n_proto;
+ list[i].m_u.ethertype.ethtype_id = headers->l2_mask.n_proto;
+ i++;
+ }
+
+ if (flags & (ICE_TC_FLWR_FIELD_DST_MAC |
+ ICE_TC_FLWR_FIELD_SRC_MAC)) {
+ struct ice_tc_l2_hdr *l2_key, *l2_mask;
+
+ l2_key = &headers->l2_key;
+ l2_mask = &headers->l2_mask;
+
+ list[i].type = ice_proto_type_from_mac(inner);
+ if (flags & ICE_TC_FLWR_FIELD_DST_MAC) {
+ ether_addr_copy(list[i].h_u.eth_hdr.dst_addr,
+ l2_key->dst_mac);
+ ether_addr_copy(list[i].m_u.eth_hdr.dst_addr,
+ l2_mask->dst_mac);
+ }
+ if (flags & ICE_TC_FLWR_FIELD_SRC_MAC) {
+ ether_addr_copy(list[i].h_u.eth_hdr.src_addr,
+ l2_key->src_mac);
+ ether_addr_copy(list[i].m_u.eth_hdr.src_addr,
+ l2_mask->src_mac);
+ }
+ i++;
+ }
+
+ /* copy VLAN info */
+ if (flags & (ICE_TC_FLWR_FIELD_VLAN | ICE_TC_FLWR_FIELD_VLAN_PRIO)) {
+ vlan_tpid = be16_to_cpu(headers->vlan_hdr.vlan_tpid);
+ rule_info->vlan_type =
+ ice_check_supported_vlan_tpid(vlan_tpid);
+
+ if (flags & ICE_TC_FLWR_FIELD_CVLAN)
+ list[i].type = ICE_VLAN_EX;
+ else
+ list[i].type = ICE_VLAN_OFOS;
+
+ if (flags & ICE_TC_FLWR_FIELD_VLAN) {
+ list[i].h_u.vlan_hdr.vlan = headers->vlan_hdr.vlan_id;
+ list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0x0FFF);
+ }
+
+ if (flags & ICE_TC_FLWR_FIELD_VLAN_PRIO) {
+ if (flags & ICE_TC_FLWR_FIELD_VLAN) {
+ list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0xEFFF);
+ } else {
+ list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0xE000);
+ list[i].h_u.vlan_hdr.vlan = 0;
+ }
+ list[i].h_u.vlan_hdr.vlan |=
+ headers->vlan_hdr.vlan_prio;
+ }
+
+ i++;
+ }
+
+ if (flags & (ICE_TC_FLWR_FIELD_CVLAN | ICE_TC_FLWR_FIELD_CVLAN_PRIO)) {
+ list[i].type = ICE_VLAN_IN;
+
+ if (flags & ICE_TC_FLWR_FIELD_CVLAN) {
+ list[i].h_u.vlan_hdr.vlan = headers->cvlan_hdr.vlan_id;
+ list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0x0FFF);
+ }
+
+ if (flags & ICE_TC_FLWR_FIELD_CVLAN_PRIO) {
+ if (flags & ICE_TC_FLWR_FIELD_CVLAN) {
+ list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0xEFFF);
+ } else {
+ list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0xE000);
+ list[i].h_u.vlan_hdr.vlan = 0;
+ }
+ list[i].h_u.vlan_hdr.vlan |=
+ headers->cvlan_hdr.vlan_prio;
+ }
+
+ i++;
+ }
+
+ if (flags & (ICE_TC_FLWR_FIELD_PPPOE_SESSID |
+ ICE_TC_FLWR_FIELD_PPP_PROTO)) {
+ struct ice_pppoe_hdr *vals, *masks;
+
+ vals = &list[i].h_u.pppoe_hdr;
+ masks = &list[i].m_u.pppoe_hdr;
+
+ list[i].type = ICE_PPPOE;
+
+ if (flags & ICE_TC_FLWR_FIELD_PPPOE_SESSID) {
+ vals->session_id = headers->pppoe_hdr.session_id;
+ masks->session_id = cpu_to_be16(0xFFFF);
+ }
+
+ if (flags & ICE_TC_FLWR_FIELD_PPP_PROTO) {
+ vals->ppp_prot_id = headers->pppoe_hdr.ppp_proto;
+ masks->ppp_prot_id = cpu_to_be16(0xFFFF);
+ }
+
+ i++;
+ }
+
+ /* copy L3 (IPv[4|6]: src, dest) address */
+ if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV4 |
+ ICE_TC_FLWR_FIELD_SRC_IPV4)) {
+ struct ice_tc_l3_hdr *l3_key, *l3_mask;
+
+ list[i].type = ice_proto_type_from_ipv4(inner);
+ l3_key = &headers->l3_key;
+ l3_mask = &headers->l3_mask;
+ if (flags & ICE_TC_FLWR_FIELD_DEST_IPV4) {
+ list[i].h_u.ipv4_hdr.dst_addr = l3_key->dst_ipv4;
+ list[i].m_u.ipv4_hdr.dst_addr = l3_mask->dst_ipv4;
+ }
+ if (flags & ICE_TC_FLWR_FIELD_SRC_IPV4) {
+ list[i].h_u.ipv4_hdr.src_addr = l3_key->src_ipv4;
+ list[i].m_u.ipv4_hdr.src_addr = l3_mask->src_ipv4;
+ }
+ i++;
+ } else if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV6 |
+ ICE_TC_FLWR_FIELD_SRC_IPV6)) {
+ struct ice_ipv6_hdr *ipv6_hdr, *ipv6_mask;
+ struct ice_tc_l3_hdr *l3_key, *l3_mask;
+
+ list[i].type = ice_proto_type_from_ipv6(inner);
+ ipv6_hdr = &list[i].h_u.ipv6_hdr;
+ ipv6_mask = &list[i].m_u.ipv6_hdr;
+ l3_key = &headers->l3_key;
+ l3_mask = &headers->l3_mask;
+
+ if (flags & ICE_TC_FLWR_FIELD_DEST_IPV6) {
+ memcpy(&ipv6_hdr->dst_addr, &l3_key->dst_ipv6_addr,
+ sizeof(l3_key->dst_ipv6_addr));
+ memcpy(&ipv6_mask->dst_addr, &l3_mask->dst_ipv6_addr,
+ sizeof(l3_mask->dst_ipv6_addr));
+ }
+ if (flags & ICE_TC_FLWR_FIELD_SRC_IPV6) {
+ memcpy(&ipv6_hdr->src_addr, &l3_key->src_ipv6_addr,
+ sizeof(l3_key->src_ipv6_addr));
+ memcpy(&ipv6_mask->src_addr, &l3_mask->src_ipv6_addr,
+ sizeof(l3_mask->src_ipv6_addr));
+ }
+ i++;
+ }
+
+ if (headers->l2_key.n_proto == htons(ETH_P_IP) &&
+ (flags & (ICE_TC_FLWR_FIELD_IP_TOS | ICE_TC_FLWR_FIELD_IP_TTL))) {
+ list[i].type = ice_proto_type_from_ipv4(inner);
+
+ if (flags & ICE_TC_FLWR_FIELD_IP_TOS) {
+ list[i].h_u.ipv4_hdr.tos = headers->l3_key.tos;
+ list[i].m_u.ipv4_hdr.tos = headers->l3_mask.tos;
+ }
+
+ if (flags & ICE_TC_FLWR_FIELD_IP_TTL) {
+ list[i].h_u.ipv4_hdr.time_to_live =
+ headers->l3_key.ttl;
+ list[i].m_u.ipv4_hdr.time_to_live =
+ headers->l3_mask.ttl;
+ }
+
+ i++;
+ }
+
+ if (headers->l2_key.n_proto == htons(ETH_P_IPV6) &&
+ (flags & (ICE_TC_FLWR_FIELD_IP_TOS | ICE_TC_FLWR_FIELD_IP_TTL))) {
+ struct ice_ipv6_hdr *hdr_h, *hdr_m;
+
+ hdr_h = &list[i].h_u.ipv6_hdr;
+ hdr_m = &list[i].m_u.ipv6_hdr;
+ list[i].type = ice_proto_type_from_ipv6(inner);
+
+ if (flags & ICE_TC_FLWR_FIELD_IP_TOS) {
+ be32p_replace_bits(&hdr_h->be_ver_tc_flow,
+ headers->l3_key.tos,
+ ICE_IPV6_HDR_TC_MASK);
+ be32p_replace_bits(&hdr_m->be_ver_tc_flow,
+ headers->l3_mask.tos,
+ ICE_IPV6_HDR_TC_MASK);
+ }
+
+ if (flags & ICE_TC_FLWR_FIELD_IP_TTL) {
+ hdr_h->hop_limit = headers->l3_key.ttl;
+ hdr_m->hop_limit = headers->l3_mask.ttl;
+ }
+
+ i++;
+ }
+
+ if (flags & ICE_TC_FLWR_FIELD_L2TPV3_SESSID) {
+ list[i].type = ICE_L2TPV3;
+
+ list[i].h_u.l2tpv3_sess_hdr.session_id =
+ headers->l2tpv3_hdr.session_id;
+ list[i].m_u.l2tpv3_sess_hdr.session_id =
+ cpu_to_be32(0xFFFFFFFF);
+
+ i++;
+ }
+
+ /* copy L4 (src, dest) port */
+ if (flags & (ICE_TC_FLWR_FIELD_DEST_L4_PORT |
+ ICE_TC_FLWR_FIELD_SRC_L4_PORT)) {
+ struct ice_tc_l4_hdr *l4_key, *l4_mask;
+
+ list[i].type = ice_proto_type_from_l4_port(headers->l3_key.ip_proto);
+ l4_key = &headers->l4_key;
+ l4_mask = &headers->l4_mask;
+
+ if (flags & ICE_TC_FLWR_FIELD_DEST_L4_PORT) {
+ list[i].h_u.l4_hdr.dst_port = l4_key->dst_port;
+ list[i].m_u.l4_hdr.dst_port = l4_mask->dst_port;
+ }
+ if (flags & ICE_TC_FLWR_FIELD_SRC_L4_PORT) {
+ list[i].h_u.l4_hdr.src_port = l4_key->src_port;
+ list[i].m_u.l4_hdr.src_port = l4_mask->src_port;
+ }
+ i++;
+ }
+
+ return i;
+}
+
+/**
+ * ice_tc_tun_get_type - get the tunnel type
+ * @tunnel_dev: ptr to tunnel device
+ *
+ * This function detects appropriate tunnel_type if specified device is
+ * tunnel device such as VXLAN/Geneve
+ */
+static int ice_tc_tun_get_type(struct net_device *tunnel_dev)
+{
+ if (netif_is_vxlan(tunnel_dev))
+ return TNL_VXLAN;
+ if (netif_is_geneve(tunnel_dev))
+ return TNL_GENEVE;
+ if (netif_is_gretap(tunnel_dev) ||
+ netif_is_ip6gretap(tunnel_dev))
+ return TNL_GRETAP;
+
+ /* Assume GTP-U by default in case of GTP netdev.
+ * GTP-C may be selected later, based on enc_dst_port.
+ */
+ if (netif_is_gtp(tunnel_dev))
+ return TNL_GTPU;
+ return TNL_LAST;
+}
+
+bool ice_is_tunnel_supported(struct net_device *dev)
+{
+ return ice_tc_tun_get_type(dev) != TNL_LAST;
+}
+
+static int
+ice_eswitch_tc_parse_action(struct ice_tc_flower_fltr *fltr,
+ struct flow_action_entry *act)
+{
+ struct ice_repr *repr;
+
+ switch (act->id) {
+ case FLOW_ACTION_DROP:
+ fltr->action.fltr_act = ICE_DROP_PACKET;
+ break;
+
+ case FLOW_ACTION_REDIRECT:
+ fltr->action.fltr_act = ICE_FWD_TO_VSI;
+
+ if (ice_is_port_repr_netdev(act->dev)) {
+ repr = ice_netdev_to_repr(act->dev);
+
+ fltr->dest_vsi = repr->src_vsi;
+ fltr->direction = ICE_ESWITCH_FLTR_INGRESS;
+ } else if (netif_is_ice(act->dev) ||
+ ice_is_tunnel_supported(act->dev)) {
+ fltr->direction = ICE_ESWITCH_FLTR_EGRESS;
+ } else {
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported netdevice in switchdev mode");
+ return -EINVAL;
+ }
+
+ break;
+
+ default:
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported action in switchdev mode");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
+{
+ struct ice_tc_flower_lyr_2_4_hdrs *headers = &fltr->outer_headers;
+ struct ice_adv_rule_info rule_info = { 0 };
+ struct ice_rule_query_data rule_added;
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_adv_lkup_elem *list;
+ u32 flags = fltr->flags;
+ int lkups_cnt;
+ int ret;
+ int i;
+
+ if (!flags || (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT)) {
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported encap field(s)");
+ return -EOPNOTSUPP;
+ }
+
+ lkups_cnt = ice_tc_count_lkups(flags, headers, fltr);
+ list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC);
+ if (!list)
+ return -ENOMEM;
+
+ i = ice_tc_fill_rules(hw, flags, fltr, list, &rule_info, NULL);
+ if (i != lkups_cnt) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ /* egress traffic is always redirect to uplink */
+ if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS)
+ fltr->dest_vsi = vsi->back->switchdev.uplink_vsi;
+
+ rule_info.sw_act.fltr_act = fltr->action.fltr_act;
+ if (fltr->action.fltr_act != ICE_DROP_PACKET)
+ rule_info.sw_act.vsi_handle = fltr->dest_vsi->idx;
+ /* For now, making priority to be highest, and it also becomes
+ * the priority for recipe which will get created as a result of
+ * new extraction sequence based on input set.
+ * Priority '7' is max val for switch recipe, higher the number
+ * results into order of switch rule evaluation.
+ */
+ rule_info.priority = 7;
+ rule_info.flags_info.act_valid = true;
+
+ if (fltr->direction == ICE_ESWITCH_FLTR_INGRESS) {
+ rule_info.sw_act.flag |= ICE_FLTR_RX;
+ rule_info.sw_act.src = hw->pf_id;
+ rule_info.rx = true;
+ rule_info.flags_info.act = ICE_SINGLE_ACT_LB_ENABLE;
+ } else {
+ rule_info.sw_act.flag |= ICE_FLTR_TX;
+ rule_info.sw_act.src = vsi->idx;
+ rule_info.rx = false;
+ rule_info.flags_info.act = ICE_SINGLE_ACT_LAN_ENABLE;
+ }
+
+ /* specify the cookie as filter_rule_id */
+ rule_info.fltr_rule_id = fltr->cookie;
+
+ ret = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added);
+ if (ret == -EEXIST) {
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because it already exist");
+ ret = -EINVAL;
+ goto exit;
+ } else if (ret) {
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter due to error");
+ goto exit;
+ }
+
+ /* store the output params, which are needed later for removing
+ * advanced switch filter
+ */
+ fltr->rid = rule_added.rid;
+ fltr->rule_id = rule_added.rule_id;
+ fltr->dest_id = rule_added.vsi_handle;
+
+exit:
+ kfree(list);
+ return ret;
+}
+
+/**
+ * ice_add_tc_flower_adv_fltr - add appropriate filter rules
+ * @vsi: Pointer to VSI
+ * @tc_fltr: Pointer to TC flower filter structure
+ *
+ * based on filter parameters using Advance recipes supported
+ * by OS package.
+ */
+static int
+ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi,
+ struct ice_tc_flower_fltr *tc_fltr)
+{
+ struct ice_tc_flower_lyr_2_4_hdrs *headers = &tc_fltr->outer_headers;
+ struct ice_adv_rule_info rule_info = {0};
+ struct ice_rule_query_data rule_added;
+ struct ice_adv_lkup_elem *list;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u32 flags = tc_fltr->flags;
+ struct ice_vsi *ch_vsi;
+ struct device *dev;
+ u16 lkups_cnt = 0;
+ u16 l4_proto = 0;
+ int ret = 0;
+ u16 i = 0;
+
+ dev = ice_pf_to_dev(pf);
+ if (ice_is_safe_mode(pf)) {
+ NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unable to add filter because driver is in safe mode");
+ return -EOPNOTSUPP;
+ }
+
+ if (!flags || (flags & (ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 |
+ ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 |
+ ICE_TC_FLWR_FIELD_ENC_DEST_IPV6 |
+ ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 |
+ ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT))) {
+ NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unsupported encap field(s)");
+ return -EOPNOTSUPP;
+ }
+
+ /* get the channel (aka ADQ VSI) */
+ if (tc_fltr->dest_vsi)
+ ch_vsi = tc_fltr->dest_vsi;
+ else
+ ch_vsi = vsi->tc_map_vsi[tc_fltr->action.tc_class];
+
+ lkups_cnt = ice_tc_count_lkups(flags, headers, tc_fltr);
+ list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC);
+ if (!list)
+ return -ENOMEM;
+
+ i = ice_tc_fill_rules(hw, flags, tc_fltr, list, &rule_info, &l4_proto);
+ if (i != lkups_cnt) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ rule_info.sw_act.fltr_act = tc_fltr->action.fltr_act;
+ if (tc_fltr->action.tc_class >= ICE_CHNL_START_TC) {
+ if (!ch_vsi) {
+ NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unable to add filter because specified destination doesn't exist");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ rule_info.sw_act.fltr_act = ICE_FWD_TO_VSI;
+ rule_info.sw_act.vsi_handle = ch_vsi->idx;
+ rule_info.priority = 7;
+ rule_info.sw_act.src = hw->pf_id;
+ rule_info.rx = true;
+ dev_dbg(dev, "add switch rule for TC:%u vsi_idx:%u, lkups_cnt:%u\n",
+ tc_fltr->action.tc_class,
+ rule_info.sw_act.vsi_handle, lkups_cnt);
+ } else {
+ rule_info.sw_act.flag |= ICE_FLTR_TX;
+ rule_info.sw_act.src = vsi->idx;
+ rule_info.rx = false;
+ }
+
+ /* specify the cookie as filter_rule_id */
+ rule_info.fltr_rule_id = tc_fltr->cookie;
+
+ ret = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added);
+ if (ret == -EEXIST) {
+ NL_SET_ERR_MSG_MOD(tc_fltr->extack,
+ "Unable to add filter because it already exist");
+ ret = -EINVAL;
+ goto exit;
+ } else if (ret) {
+ NL_SET_ERR_MSG_MOD(tc_fltr->extack,
+ "Unable to add filter due to error");
+ goto exit;
+ }
+
+ /* store the output params, which are needed later for removing
+ * advanced switch filter
+ */
+ tc_fltr->rid = rule_added.rid;
+ tc_fltr->rule_id = rule_added.rule_id;
+ if (tc_fltr->action.tc_class > 0 && ch_vsi) {
+ /* For PF ADQ, VSI type is set as ICE_VSI_CHNL, and
+ * for PF ADQ filter, it is not yet set in tc_fltr,
+ * hence store the dest_vsi ptr in tc_fltr
+ */
+ if (ch_vsi->type == ICE_VSI_CHNL)
+ tc_fltr->dest_vsi = ch_vsi;
+ /* keep track of advanced switch filter for
+ * destination VSI (channel VSI)
+ */
+ ch_vsi->num_chnl_fltr++;
+ /* in this case, dest_id is VSI handle (sw handle) */
+ tc_fltr->dest_id = rule_added.vsi_handle;
+
+ /* keeps track of channel filters for PF VSI */
+ if (vsi->type == ICE_VSI_PF &&
+ (flags & (ICE_TC_FLWR_FIELD_DST_MAC |
+ ICE_TC_FLWR_FIELD_ENC_DST_MAC)))
+ pf->num_dmac_chnl_fltrs++;
+ }
+ dev_dbg(dev, "added switch rule (lkups_cnt %u, flags 0x%x) for TC %u, rid %u, rule_id %u, vsi_idx %u\n",
+ lkups_cnt, flags,
+ tc_fltr->action.tc_class, rule_added.rid,
+ rule_added.rule_id, rule_added.vsi_handle);
+exit:
+ kfree(list);
+ return ret;
+}
+
+/**
+ * ice_tc_set_pppoe - Parse PPPoE fields from TC flower filter
+ * @match: Pointer to flow match structure
+ * @fltr: Pointer to filter structure
+ * @headers: Pointer to outer header fields
+ * @returns PPP protocol used in filter (ppp_ses or ppp_disc)
+ */
+static u16
+ice_tc_set_pppoe(struct flow_match_pppoe *match,
+ struct ice_tc_flower_fltr *fltr,
+ struct ice_tc_flower_lyr_2_4_hdrs *headers)
+{
+ if (match->mask->session_id) {
+ fltr->flags |= ICE_TC_FLWR_FIELD_PPPOE_SESSID;
+ headers->pppoe_hdr.session_id = match->key->session_id;
+ }
+
+ if (match->mask->ppp_proto) {
+ fltr->flags |= ICE_TC_FLWR_FIELD_PPP_PROTO;
+ headers->pppoe_hdr.ppp_proto = match->key->ppp_proto;
+ }
+
+ return be16_to_cpu(match->key->type);
+}
+
+/**
+ * ice_tc_set_ipv4 - Parse IPv4 addresses from TC flower filter
+ * @match: Pointer to flow match structure
+ * @fltr: Pointer to filter structure
+ * @headers: inner or outer header fields
+ * @is_encap: set true for tunnel IPv4 address
+ */
+static int
+ice_tc_set_ipv4(struct flow_match_ipv4_addrs *match,
+ struct ice_tc_flower_fltr *fltr,
+ struct ice_tc_flower_lyr_2_4_hdrs *headers, bool is_encap)
+{
+ if (match->key->dst) {
+ if (is_encap)
+ fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_IPV4;
+ else
+ fltr->flags |= ICE_TC_FLWR_FIELD_DEST_IPV4;
+ headers->l3_key.dst_ipv4 = match->key->dst;
+ headers->l3_mask.dst_ipv4 = match->mask->dst;
+ }
+ if (match->key->src) {
+ if (is_encap)
+ fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_IPV4;
+ else
+ fltr->flags |= ICE_TC_FLWR_FIELD_SRC_IPV4;
+ headers->l3_key.src_ipv4 = match->key->src;
+ headers->l3_mask.src_ipv4 = match->mask->src;
+ }
+ return 0;
+}
+
+/**
+ * ice_tc_set_ipv6 - Parse IPv6 addresses from TC flower filter
+ * @match: Pointer to flow match structure
+ * @fltr: Pointer to filter structure
+ * @headers: inner or outer header fields
+ * @is_encap: set true for tunnel IPv6 address
+ */
+static int
+ice_tc_set_ipv6(struct flow_match_ipv6_addrs *match,
+ struct ice_tc_flower_fltr *fltr,
+ struct ice_tc_flower_lyr_2_4_hdrs *headers, bool is_encap)
+{
+ struct ice_tc_l3_hdr *l3_key, *l3_mask;
+
+ /* src and dest IPV6 address should not be LOOPBACK
+ * (0:0:0:0:0:0:0:1), which can be represented as ::1
+ */
+ if (ipv6_addr_loopback(&match->key->dst) ||
+ ipv6_addr_loopback(&match->key->src)) {
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Bad IPv6, addr is LOOPBACK");
+ return -EINVAL;
+ }
+ /* if src/dest IPv6 address is *,* error */
+ if (ipv6_addr_any(&match->mask->dst) &&
+ ipv6_addr_any(&match->mask->src)) {
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Bad src/dest IPv6, addr is any");
+ return -EINVAL;
+ }
+ if (!ipv6_addr_any(&match->mask->dst)) {
+ if (is_encap)
+ fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_IPV6;
+ else
+ fltr->flags |= ICE_TC_FLWR_FIELD_DEST_IPV6;
+ }
+ if (!ipv6_addr_any(&match->mask->src)) {
+ if (is_encap)
+ fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_IPV6;
+ else
+ fltr->flags |= ICE_TC_FLWR_FIELD_SRC_IPV6;
+ }
+
+ l3_key = &headers->l3_key;
+ l3_mask = &headers->l3_mask;
+
+ if (fltr->flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 |
+ ICE_TC_FLWR_FIELD_SRC_IPV6)) {
+ memcpy(&l3_key->src_ipv6_addr, &match->key->src.s6_addr,
+ sizeof(match->key->src.s6_addr));
+ memcpy(&l3_mask->src_ipv6_addr, &match->mask->src.s6_addr,
+ sizeof(match->mask->src.s6_addr));
+ }
+ if (fltr->flags & (ICE_TC_FLWR_FIELD_ENC_DEST_IPV6 |
+ ICE_TC_FLWR_FIELD_DEST_IPV6)) {
+ memcpy(&l3_key->dst_ipv6_addr, &match->key->dst.s6_addr,
+ sizeof(match->key->dst.s6_addr));
+ memcpy(&l3_mask->dst_ipv6_addr, &match->mask->dst.s6_addr,
+ sizeof(match->mask->dst.s6_addr));
+ }
+
+ return 0;
+}
+
+/**
+ * ice_tc_set_tos_ttl - Parse IP ToS/TTL from TC flower filter
+ * @match: Pointer to flow match structure
+ * @fltr: Pointer to filter structure
+ * @headers: inner or outer header fields
+ * @is_encap: set true for tunnel
+ */
+static void
+ice_tc_set_tos_ttl(struct flow_match_ip *match,
+ struct ice_tc_flower_fltr *fltr,
+ struct ice_tc_flower_lyr_2_4_hdrs *headers,
+ bool is_encap)
+{
+ if (match->mask->tos) {
+ if (is_encap)
+ fltr->flags |= ICE_TC_FLWR_FIELD_ENC_IP_TOS;
+ else
+ fltr->flags |= ICE_TC_FLWR_FIELD_IP_TOS;
+
+ headers->l3_key.tos = match->key->tos;
+ headers->l3_mask.tos = match->mask->tos;
+ }
+
+ if (match->mask->ttl) {
+ if (is_encap)
+ fltr->flags |= ICE_TC_FLWR_FIELD_ENC_IP_TTL;
+ else
+ fltr->flags |= ICE_TC_FLWR_FIELD_IP_TTL;
+
+ headers->l3_key.ttl = match->key->ttl;
+ headers->l3_mask.ttl = match->mask->ttl;
+ }
+}
+
+/**
+ * ice_tc_set_port - Parse ports from TC flower filter
+ * @match: Flow match structure
+ * @fltr: Pointer to filter structure
+ * @headers: inner or outer header fields
+ * @is_encap: set true for tunnel port
+ */
+static int
+ice_tc_set_port(struct flow_match_ports match,
+ struct ice_tc_flower_fltr *fltr,
+ struct ice_tc_flower_lyr_2_4_hdrs *headers, bool is_encap)
+{
+ if (match.key->dst) {
+ if (is_encap)
+ fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT;
+ else
+ fltr->flags |= ICE_TC_FLWR_FIELD_DEST_L4_PORT;
+
+ headers->l4_key.dst_port = match.key->dst;
+ headers->l4_mask.dst_port = match.mask->dst;
+ }
+ if (match.key->src) {
+ if (is_encap)
+ fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT;
+ else
+ fltr->flags |= ICE_TC_FLWR_FIELD_SRC_L4_PORT;
+
+ headers->l4_key.src_port = match.key->src;
+ headers->l4_mask.src_port = match.mask->src;
+ }
+ return 0;
+}
+
+static struct net_device *
+ice_get_tunnel_device(struct net_device *dev, struct flow_rule *rule)
+{
+ struct flow_action_entry *act;
+ int i;
+
+ if (ice_is_tunnel_supported(dev))
+ return dev;
+
+ flow_action_for_each(i, act, &rule->action) {
+ if (act->id == FLOW_ACTION_REDIRECT &&
+ ice_is_tunnel_supported(act->dev))
+ return act->dev;
+ }
+
+ return NULL;
+}
+
+/**
+ * ice_parse_gtp_type - Sets GTP tunnel type to GTP-U or GTP-C
+ * @match: Flow match structure
+ * @fltr: Pointer to filter structure
+ *
+ * GTP-C/GTP-U is selected based on destination port number (enc_dst_port).
+ * Before calling this funtcion, fltr->tunnel_type should be set to TNL_GTPU,
+ * therefore making GTP-U the default choice (when destination port number is
+ * not specified).
+ */
+static int
+ice_parse_gtp_type(struct flow_match_ports match,
+ struct ice_tc_flower_fltr *fltr)
+{
+ u16 dst_port;
+
+ if (match.key->dst) {
+ dst_port = be16_to_cpu(match.key->dst);
+
+ switch (dst_port) {
+ case 2152:
+ break;
+ case 2123:
+ fltr->tunnel_type = TNL_GTPC;
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported GTP port number");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int
+ice_parse_tunnel_attr(struct net_device *dev, struct flow_rule *rule,
+ struct ice_tc_flower_fltr *fltr)
+{
+ struct ice_tc_flower_lyr_2_4_hdrs *headers = &fltr->outer_headers;
+ struct flow_match_control enc_control;
+
+ fltr->tunnel_type = ice_tc_tun_get_type(dev);
+ headers->l3_key.ip_proto = IPPROTO_UDP;
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_match_enc_keyid enc_keyid;
+
+ flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+ if (!enc_keyid.mask->keyid ||
+ enc_keyid.mask->keyid != cpu_to_be32(ICE_TC_FLOWER_MASK_32))
+ return -EINVAL;
+
+ fltr->flags |= ICE_TC_FLWR_FIELD_TENANT_ID;
+ fltr->tenant_id = enc_keyid.key->keyid;
+ }
+
+ flow_rule_match_enc_control(rule, &enc_control);
+
+ if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs match;
+
+ flow_rule_match_enc_ipv4_addrs(rule, &match);
+ if (ice_tc_set_ipv4(&match, fltr, headers, true))
+ return -EINVAL;
+ } else if (enc_control.key->addr_type ==
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs match;
+
+ flow_rule_match_enc_ipv6_addrs(rule, &match);
+ if (ice_tc_set_ipv6(&match, fltr, headers, true))
+ return -EINVAL;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
+ struct flow_match_ip match;
+
+ flow_rule_match_enc_ip(rule, &match);
+ ice_tc_set_tos_ttl(&match, fltr, headers, true);
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) &&
+ fltr->tunnel_type != TNL_VXLAN && fltr->tunnel_type != TNL_GENEVE) {
+ struct flow_match_ports match;
+
+ flow_rule_match_enc_ports(rule, &match);
+
+ if (fltr->tunnel_type != TNL_GTPU) {
+ if (ice_tc_set_port(match, fltr, headers, true))
+ return -EINVAL;
+ } else {
+ if (ice_parse_gtp_type(match, fltr))
+ return -EINVAL;
+ }
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) {
+ struct flow_match_enc_opts match;
+
+ flow_rule_match_enc_opts(rule, &match);
+
+ memcpy(&fltr->gtp_pdu_info_keys, &match.key->data[0],
+ sizeof(struct gtp_pdu_session_info));
+
+ memcpy(&fltr->gtp_pdu_info_masks, &match.mask->data[0],
+ sizeof(struct gtp_pdu_session_info));
+
+ fltr->flags |= ICE_TC_FLWR_FIELD_ENC_OPTS;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_parse_cls_flower - Parse TC flower filters provided by kernel
+ * @vsi: Pointer to the VSI
+ * @filter_dev: Pointer to device on which filter is being added
+ * @f: Pointer to struct flow_cls_offload
+ * @fltr: Pointer to filter structure
+ */
+static int
+ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
+ struct flow_cls_offload *f,
+ struct ice_tc_flower_fltr *fltr)
+{
+ struct ice_tc_flower_lyr_2_4_hdrs *headers = &fltr->outer_headers;
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0;
+ struct flow_dissector *dissector;
+ struct net_device *tunnel_dev;
+
+ dissector = rule->match.dissector;
+
+ if (dissector->used_keys &
+ ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_VLAN) |
+ BIT(FLOW_DISSECTOR_KEY_CVLAN) |
+ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
+ BIT(FLOW_DISSECTOR_KEY_IP) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
+ BIT(FLOW_DISSECTOR_KEY_PORTS) |
+ BIT(FLOW_DISSECTOR_KEY_PPPOE) |
+ BIT(FLOW_DISSECTOR_KEY_L2TPV3))) {
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported key used");
+ return -EOPNOTSUPP;
+ }
+
+ tunnel_dev = ice_get_tunnel_device(filter_dev, rule);
+ if (tunnel_dev) {
+ int err;
+
+ filter_dev = tunnel_dev;
+
+ err = ice_parse_tunnel_attr(filter_dev, rule, fltr);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Failed to parse TC flower tunnel attributes");
+ return err;
+ }
+
+ /* header pointers should point to the inner headers, outer
+ * header were already set by ice_parse_tunnel_attr
+ */
+ headers = &fltr->inner_headers;
+ } else if (dissector->used_keys &
+ (BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))) {
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Tunnel key used, but device isn't a tunnel");
+ return -EOPNOTSUPP;
+ } else {
+ fltr->tunnel_type = TNL_LAST;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_match_basic match;
+
+ flow_rule_match_basic(rule, &match);
+
+ n_proto_key = ntohs(match.key->n_proto);
+ n_proto_mask = ntohs(match.mask->n_proto);
+
+ if (n_proto_key == ETH_P_ALL || n_proto_key == 0 ||
+ fltr->tunnel_type == TNL_GTPU ||
+ fltr->tunnel_type == TNL_GTPC) {
+ n_proto_key = 0;
+ n_proto_mask = 0;
+ } else {
+ fltr->flags |= ICE_TC_FLWR_FIELD_ETH_TYPE_ID;
+ }
+
+ headers->l2_key.n_proto = cpu_to_be16(n_proto_key);
+ headers->l2_mask.n_proto = cpu_to_be16(n_proto_mask);
+ headers->l3_key.ip_proto = match.key->ip_proto;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ struct flow_match_eth_addrs match;
+
+ flow_rule_match_eth_addrs(rule, &match);
+
+ if (!is_zero_ether_addr(match.key->dst)) {
+ ether_addr_copy(headers->l2_key.dst_mac,
+ match.key->dst);
+ ether_addr_copy(headers->l2_mask.dst_mac,
+ match.mask->dst);
+ fltr->flags |= ICE_TC_FLWR_FIELD_DST_MAC;
+ }
+
+ if (!is_zero_ether_addr(match.key->src)) {
+ ether_addr_copy(headers->l2_key.src_mac,
+ match.key->src);
+ ether_addr_copy(headers->l2_mask.src_mac,
+ match.mask->src);
+ fltr->flags |= ICE_TC_FLWR_FIELD_SRC_MAC;
+ }
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
+ is_vlan_dev(filter_dev)) {
+ struct flow_dissector_key_vlan mask;
+ struct flow_dissector_key_vlan key;
+ struct flow_match_vlan match;
+
+ if (is_vlan_dev(filter_dev)) {
+ match.key = &key;
+ match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
+ match.key->vlan_priority = 0;
+ match.mask = &mask;
+ memset(match.mask, 0xff, sizeof(*match.mask));
+ match.mask->vlan_priority = 0;
+ } else {
+ flow_rule_match_vlan(rule, &match);
+ }
+
+ if (match.mask->vlan_id) {
+ if (match.mask->vlan_id == VLAN_VID_MASK) {
+ fltr->flags |= ICE_TC_FLWR_FIELD_VLAN;
+ headers->vlan_hdr.vlan_id =
+ cpu_to_be16(match.key->vlan_id &
+ VLAN_VID_MASK);
+ } else {
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Bad VLAN mask");
+ return -EINVAL;
+ }
+ }
+
+ if (match.mask->vlan_priority) {
+ fltr->flags |= ICE_TC_FLWR_FIELD_VLAN_PRIO;
+ headers->vlan_hdr.vlan_prio =
+ be16_encode_bits(match.key->vlan_priority,
+ VLAN_PRIO_MASK);
+ }
+
+ if (match.mask->vlan_tpid)
+ headers->vlan_hdr.vlan_tpid = match.key->vlan_tpid;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
+ struct flow_match_vlan match;
+
+ if (!ice_is_dvm_ena(&vsi->back->hw)) {
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Double VLAN mode is not enabled");
+ return -EINVAL;
+ }
+
+ flow_rule_match_cvlan(rule, &match);
+
+ if (match.mask->vlan_id) {
+ if (match.mask->vlan_id == VLAN_VID_MASK) {
+ fltr->flags |= ICE_TC_FLWR_FIELD_CVLAN;
+ headers->cvlan_hdr.vlan_id =
+ cpu_to_be16(match.key->vlan_id &
+ VLAN_VID_MASK);
+ } else {
+ NL_SET_ERR_MSG_MOD(fltr->extack,
+ "Bad CVLAN mask");
+ return -EINVAL;
+ }
+ }
+
+ if (match.mask->vlan_priority) {
+ fltr->flags |= ICE_TC_FLWR_FIELD_CVLAN_PRIO;
+ headers->cvlan_hdr.vlan_prio =
+ be16_encode_bits(match.key->vlan_priority,
+ VLAN_PRIO_MASK);
+ }
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PPPOE)) {
+ struct flow_match_pppoe match;
+
+ flow_rule_match_pppoe(rule, &match);
+ n_proto_key = ice_tc_set_pppoe(&match, fltr, headers);
+
+ /* If ethertype equals ETH_P_PPP_SES, n_proto might be
+ * overwritten by encapsulated protocol (ppp_proto field) or set
+ * to 0. To correct this, flow_match_pppoe provides the type
+ * field, which contains the actual ethertype (ETH_P_PPP_SES).
+ */
+ headers->l2_key.n_proto = cpu_to_be16(n_proto_key);
+ headers->l2_mask.n_proto = cpu_to_be16(0xFFFF);
+ fltr->flags |= ICE_TC_FLWR_FIELD_ETH_TYPE_ID;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+ struct flow_match_control match;
+
+ flow_rule_match_control(rule, &match);
+
+ addr_type = match.key->addr_type;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs match;
+
+ flow_rule_match_ipv4_addrs(rule, &match);
+ if (ice_tc_set_ipv4(&match, fltr, headers, false))
+ return -EINVAL;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs match;
+
+ flow_rule_match_ipv6_addrs(rule, &match);
+ if (ice_tc_set_ipv6(&match, fltr, headers, false))
+ return -EINVAL;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+ struct flow_match_ip match;
+
+ flow_rule_match_ip(rule, &match);
+ ice_tc_set_tos_ttl(&match, fltr, headers, false);
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_L2TPV3)) {
+ struct flow_match_l2tpv3 match;
+
+ flow_rule_match_l2tpv3(rule, &match);
+
+ fltr->flags |= ICE_TC_FLWR_FIELD_L2TPV3_SESSID;
+ headers->l2tpv3_hdr.session_id = match.key->session_id;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_match_ports match;
+
+ flow_rule_match_ports(rule, &match);
+ if (ice_tc_set_port(match, fltr, headers, false))
+ return -EINVAL;
+ switch (headers->l3_key.ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Only UDP and TCP transport are supported");
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+/**
+ * ice_add_switch_fltr - Add TC flower filters
+ * @vsi: Pointer to VSI
+ * @fltr: Pointer to struct ice_tc_flower_fltr
+ *
+ * Add filter in HW switch block
+ */
+static int
+ice_add_switch_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
+{
+ if (fltr->action.fltr_act == ICE_FWD_TO_QGRP)
+ return -EOPNOTSUPP;
+
+ if (ice_is_eswitch_mode_switchdev(vsi->back))
+ return ice_eswitch_add_tc_fltr(vsi, fltr);
+
+ return ice_add_tc_flower_adv_fltr(vsi, fltr);
+}
+
+/**
+ * ice_handle_tclass_action - Support directing to a traffic class
+ * @vsi: Pointer to VSI
+ * @cls_flower: Pointer to TC flower offload structure
+ * @fltr: Pointer to TC flower filter structure
+ *
+ * Support directing traffic to a traffic class
+ */
+static int
+ice_handle_tclass_action(struct ice_vsi *vsi,
+ struct flow_cls_offload *cls_flower,
+ struct ice_tc_flower_fltr *fltr)
+{
+ int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid);
+ struct ice_vsi *main_vsi;
+
+ if (tc < 0) {
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because specified destination is invalid");
+ return -EINVAL;
+ }
+ if (!tc) {
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because of invalid destination");
+ return -EINVAL;
+ }
+
+ if (!(vsi->all_enatc & BIT(tc))) {
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because of non-existence destination");
+ return -EINVAL;
+ }
+
+ /* Redirect to a TC class or Queue Group */
+ main_vsi = ice_get_main_vsi(vsi->back);
+ if (!main_vsi || !main_vsi->netdev) {
+ NL_SET_ERR_MSG_MOD(fltr->extack,
+ "Unable to add filter because of invalid netdevice");
+ return -EINVAL;
+ }
+
+ if ((fltr->flags & ICE_TC_FLWR_FIELD_TENANT_ID) &&
+ (fltr->flags & (ICE_TC_FLWR_FIELD_DST_MAC |
+ ICE_TC_FLWR_FIELD_SRC_MAC))) {
+ NL_SET_ERR_MSG_MOD(fltr->extack,
+ "Unable to add filter because filter using tunnel key and inner MAC is unsupported combination");
+ return -EOPNOTSUPP;
+ }
+
+ /* For ADQ, filter must include dest MAC address, otherwise unwanted
+ * packets with unrelated MAC address get delivered to ADQ VSIs as long
+ * as remaining filter criteria is satisfied such as dest IP address
+ * and dest/src L4 port. Following code is trying to handle:
+ * 1. For non-tunnel, if user specify MAC addresses, use them (means
+ * this code won't do anything
+ * 2. For non-tunnel, if user didn't specify MAC address, add implicit
+ * dest MAC to be lower netdev's active unicast MAC address
+ * 3. For tunnel, as of now TC-filter through flower classifier doesn't
+ * have provision for user to specify outer DMAC, hence driver to
+ * implicitly add outer dest MAC to be lower netdev's active unicast
+ * MAC address.
+ */
+ if (fltr->tunnel_type != TNL_LAST &&
+ !(fltr->flags & ICE_TC_FLWR_FIELD_ENC_DST_MAC))
+ fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DST_MAC;
+
+ if (fltr->tunnel_type == TNL_LAST &&
+ !(fltr->flags & ICE_TC_FLWR_FIELD_DST_MAC))
+ fltr->flags |= ICE_TC_FLWR_FIELD_DST_MAC;
+
+ if (fltr->flags & (ICE_TC_FLWR_FIELD_DST_MAC |
+ ICE_TC_FLWR_FIELD_ENC_DST_MAC)) {
+ ether_addr_copy(fltr->outer_headers.l2_key.dst_mac,
+ vsi->netdev->dev_addr);
+ eth_broadcast_addr(fltr->outer_headers.l2_mask.dst_mac);
+ }
+
+ /* validate specified dest MAC address, make sure either it belongs to
+ * lower netdev or any of MACVLAN. MACVLANs MAC address are added as
+ * unicast MAC filter destined to main VSI.
+ */
+ if (!ice_mac_fltr_exist(&main_vsi->back->hw,
+ fltr->outer_headers.l2_key.dst_mac,
+ main_vsi->idx)) {
+ NL_SET_ERR_MSG_MOD(fltr->extack,
+ "Unable to add filter because legacy MAC filter for specified destination doesn't exist");
+ return -EINVAL;
+ }
+
+ /* Make sure VLAN is already added to main VSI, before allowing ADQ to
+ * add a VLAN based filter such as MAC + VLAN + L4 port.
+ */
+ if (fltr->flags & ICE_TC_FLWR_FIELD_VLAN) {
+ u16 vlan_id = be16_to_cpu(fltr->outer_headers.vlan_hdr.vlan_id);
+
+ if (!ice_vlan_fltr_exist(&main_vsi->back->hw, vlan_id,
+ main_vsi->idx)) {
+ NL_SET_ERR_MSG_MOD(fltr->extack,
+ "Unable to add filter because legacy VLAN filter for specified destination doesn't exist");
+ return -EINVAL;
+ }
+ }
+ fltr->action.fltr_act = ICE_FWD_TO_VSI;
+ fltr->action.tc_class = tc;
+
+ return 0;
+}
+
+/**
+ * ice_parse_tc_flower_actions - Parse the actions for a TC filter
+ * @vsi: Pointer to VSI
+ * @cls_flower: Pointer to TC flower offload structure
+ * @fltr: Pointer to TC flower filter structure
+ *
+ * Parse the actions for a TC filter
+ */
+static int
+ice_parse_tc_flower_actions(struct ice_vsi *vsi,
+ struct flow_cls_offload *cls_flower,
+ struct ice_tc_flower_fltr *fltr)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(cls_flower);
+ struct flow_action *flow_action = &rule->action;
+ struct flow_action_entry *act;
+ int i;
+
+ if (cls_flower->classid)
+ return ice_handle_tclass_action(vsi, cls_flower, fltr);
+
+ if (!flow_action_has_entries(flow_action))
+ return -EINVAL;
+
+ flow_action_for_each(i, act, flow_action) {
+ if (ice_is_eswitch_mode_switchdev(vsi->back)) {
+ int err = ice_eswitch_tc_parse_action(fltr, act);
+
+ if (err)
+ return err;
+ continue;
+ }
+ /* Allow only one rule per filter */
+
+ /* Drop action */
+ if (act->id == FLOW_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported action DROP");
+ return -EINVAL;
+ }
+ fltr->action.fltr_act = ICE_FWD_TO_VSI;
+ }
+ return 0;
+}
+
+/**
+ * ice_del_tc_fltr - deletes a filter from HW table
+ * @vsi: Pointer to VSI
+ * @fltr: Pointer to struct ice_tc_flower_fltr
+ *
+ * This function deletes a filter from HW table and manages book-keeping
+ */
+static int ice_del_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
+{
+ struct ice_rule_query_data rule_rem;
+ struct ice_pf *pf = vsi->back;
+ int err;
+
+ rule_rem.rid = fltr->rid;
+ rule_rem.rule_id = fltr->rule_id;
+ rule_rem.vsi_handle = fltr->dest_id;
+ err = ice_rem_adv_rule_by_id(&pf->hw, &rule_rem);
+ if (err) {
+ if (err == -ENOENT) {
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Filter does not exist");
+ return -ENOENT;
+ }
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Failed to delete TC flower filter");
+ return -EIO;
+ }
+
+ /* update advanced switch filter count for destination
+ * VSI if filter destination was VSI
+ */
+ if (fltr->dest_vsi) {
+ if (fltr->dest_vsi->type == ICE_VSI_CHNL) {
+ fltr->dest_vsi->num_chnl_fltr--;
+
+ /* keeps track of channel filters for PF VSI */
+ if (vsi->type == ICE_VSI_PF &&
+ (fltr->flags & (ICE_TC_FLWR_FIELD_DST_MAC |
+ ICE_TC_FLWR_FIELD_ENC_DST_MAC)))
+ pf->num_dmac_chnl_fltrs--;
+ }
+ }
+ return 0;
+}
+
+/**
+ * ice_add_tc_fltr - adds a TC flower filter
+ * @netdev: Pointer to netdev
+ * @vsi: Pointer to VSI
+ * @f: Pointer to flower offload structure
+ * @__fltr: Pointer to struct ice_tc_flower_fltr
+ *
+ * This function parses TC-flower input fields, parses action,
+ * and adds a filter.
+ */
+static int
+ice_add_tc_fltr(struct net_device *netdev, struct ice_vsi *vsi,
+ struct flow_cls_offload *f,
+ struct ice_tc_flower_fltr **__fltr)
+{
+ struct ice_tc_flower_fltr *fltr;
+ int err;
+
+ /* by default, set output to be INVALID */
+ *__fltr = NULL;
+
+ fltr = kzalloc(sizeof(*fltr), GFP_KERNEL);
+ if (!fltr)
+ return -ENOMEM;
+
+ fltr->cookie = f->cookie;
+ fltr->extack = f->common.extack;
+ fltr->src_vsi = vsi;
+ INIT_HLIST_NODE(&fltr->tc_flower_node);
+
+ err = ice_parse_cls_flower(netdev, vsi, f, fltr);
+ if (err < 0)
+ goto err;
+
+ err = ice_parse_tc_flower_actions(vsi, f, fltr);
+ if (err < 0)
+ goto err;
+
+ err = ice_add_switch_fltr(vsi, fltr);
+ if (err < 0)
+ goto err;
+
+ /* return the newly created filter */
+ *__fltr = fltr;
+
+ return 0;
+err:
+ kfree(fltr);
+ return err;
+}
+
+/**
+ * ice_find_tc_flower_fltr - Find the TC flower filter in the list
+ * @pf: Pointer to PF
+ * @cookie: filter specific cookie
+ */
+static struct ice_tc_flower_fltr *
+ice_find_tc_flower_fltr(struct ice_pf *pf, unsigned long cookie)
+{
+ struct ice_tc_flower_fltr *fltr;
+
+ hlist_for_each_entry(fltr, &pf->tc_flower_fltr_list, tc_flower_node)
+ if (cookie == fltr->cookie)
+ return fltr;
+
+ return NULL;
+}
+
+/**
+ * ice_add_cls_flower - add TC flower filters
+ * @netdev: Pointer to filter device
+ * @vsi: Pointer to VSI
+ * @cls_flower: Pointer to flower offload structure
+ */
+int
+ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi,
+ struct flow_cls_offload *cls_flower)
+{
+ struct netlink_ext_ack *extack = cls_flower->common.extack;
+ struct net_device *vsi_netdev = vsi->netdev;
+ struct ice_tc_flower_fltr *fltr;
+ struct ice_pf *pf = vsi->back;
+ int err;
+
+ if (ice_is_reset_in_progress(pf->state))
+ return -EBUSY;
+ if (test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
+ return -EINVAL;
+
+ if (ice_is_port_repr_netdev(netdev))
+ vsi_netdev = netdev;
+
+ if (!(vsi_netdev->features & NETIF_F_HW_TC) &&
+ !test_bit(ICE_FLAG_CLS_FLOWER, pf->flags)) {
+ /* Based on TC indirect notifications from kernel, all ice
+ * devices get an instance of rule from higher level device.
+ * Avoid triggering explicit error in this case.
+ */
+ if (netdev == vsi_netdev)
+ NL_SET_ERR_MSG_MOD(extack, "can't apply TC flower filters, turn ON hw-tc-offload and try again");
+ return -EINVAL;
+ }
+
+ /* avoid duplicate entries, if exists - return error */
+ fltr = ice_find_tc_flower_fltr(pf, cls_flower->cookie);
+ if (fltr) {
+ NL_SET_ERR_MSG_MOD(extack, "filter cookie already exists, ignoring");
+ return -EEXIST;
+ }
+
+ /* prep and add TC-flower filter in HW */
+ err = ice_add_tc_fltr(netdev, vsi, cls_flower, &fltr);
+ if (err)
+ return err;
+
+ /* add filter into an ordered list */
+ hlist_add_head(&fltr->tc_flower_node, &pf->tc_flower_fltr_list);
+ return 0;
+}
+
+/**
+ * ice_del_cls_flower - delete TC flower filters
+ * @vsi: Pointer to VSI
+ * @cls_flower: Pointer to struct flow_cls_offload
+ */
+int
+ice_del_cls_flower(struct ice_vsi *vsi, struct flow_cls_offload *cls_flower)
+{
+ struct ice_tc_flower_fltr *fltr;
+ struct ice_pf *pf = vsi->back;
+ int err;
+
+ /* find filter */
+ fltr = ice_find_tc_flower_fltr(pf, cls_flower->cookie);
+ if (!fltr) {
+ if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) &&
+ hlist_empty(&pf->tc_flower_fltr_list))
+ return 0;
+
+ NL_SET_ERR_MSG_MOD(cls_flower->common.extack, "failed to delete TC flower filter because unable to find it");
+ return -EINVAL;
+ }
+
+ fltr->extack = cls_flower->common.extack;
+ /* delete filter from HW */
+ err = ice_del_tc_fltr(vsi, fltr);
+ if (err)
+ return err;
+
+ /* delete filter from an ordered list */
+ hlist_del(&fltr->tc_flower_node);
+
+ /* free the filter node */
+ kfree(fltr);
+
+ return 0;
+}
+
+/**
+ * ice_replay_tc_fltrs - replay TC filters
+ * @pf: pointer to PF struct
+ */
+void ice_replay_tc_fltrs(struct ice_pf *pf)
+{
+ struct ice_tc_flower_fltr *fltr;
+ struct hlist_node *node;
+
+ hlist_for_each_entry_safe(fltr, node,
+ &pf->tc_flower_fltr_list,
+ tc_flower_node) {
+ fltr->extack = NULL;
+ ice_add_switch_fltr(fltr->src_vsi, fltr);
+ }
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h
new file mode 100644
index 000000000..92642faad
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_TC_LIB_H_
+#define _ICE_TC_LIB_H_
+
+#define ICE_TC_FLWR_FIELD_DST_MAC BIT(0)
+#define ICE_TC_FLWR_FIELD_SRC_MAC BIT(1)
+#define ICE_TC_FLWR_FIELD_VLAN BIT(2)
+#define ICE_TC_FLWR_FIELD_DEST_IPV4 BIT(3)
+#define ICE_TC_FLWR_FIELD_SRC_IPV4 BIT(4)
+#define ICE_TC_FLWR_FIELD_DEST_IPV6 BIT(5)
+#define ICE_TC_FLWR_FIELD_SRC_IPV6 BIT(6)
+#define ICE_TC_FLWR_FIELD_DEST_L4_PORT BIT(7)
+#define ICE_TC_FLWR_FIELD_SRC_L4_PORT BIT(8)
+#define ICE_TC_FLWR_FIELD_TENANT_ID BIT(9)
+#define ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 BIT(10)
+#define ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 BIT(11)
+#define ICE_TC_FLWR_FIELD_ENC_DEST_IPV6 BIT(12)
+#define ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 BIT(13)
+#define ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT BIT(14)
+#define ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT BIT(15)
+#define ICE_TC_FLWR_FIELD_ENC_DST_MAC BIT(16)
+#define ICE_TC_FLWR_FIELD_ETH_TYPE_ID BIT(17)
+#define ICE_TC_FLWR_FIELD_ENC_OPTS BIT(18)
+#define ICE_TC_FLWR_FIELD_CVLAN BIT(19)
+#define ICE_TC_FLWR_FIELD_PPPOE_SESSID BIT(20)
+#define ICE_TC_FLWR_FIELD_PPP_PROTO BIT(21)
+#define ICE_TC_FLWR_FIELD_IP_TOS BIT(22)
+#define ICE_TC_FLWR_FIELD_IP_TTL BIT(23)
+#define ICE_TC_FLWR_FIELD_ENC_IP_TOS BIT(24)
+#define ICE_TC_FLWR_FIELD_ENC_IP_TTL BIT(25)
+#define ICE_TC_FLWR_FIELD_L2TPV3_SESSID BIT(26)
+#define ICE_TC_FLWR_FIELD_VLAN_PRIO BIT(27)
+#define ICE_TC_FLWR_FIELD_CVLAN_PRIO BIT(28)
+
+#define ICE_TC_FLOWER_MASK_32 0xFFFFFFFF
+
+#define ICE_IPV6_HDR_TC_MASK 0xFF00000
+
+struct ice_indr_block_priv {
+ struct net_device *netdev;
+ struct ice_netdev_priv *np;
+ struct list_head list;
+};
+
+struct ice_tc_flower_action {
+ u32 tc_class;
+ enum ice_sw_fwd_act_type fltr_act;
+};
+
+struct ice_tc_vlan_hdr {
+ __be16 vlan_id; /* Only last 12 bits valid */
+ __be16 vlan_prio; /* Only last 3 bits valid (valid values: 0..7) */
+ __be16 vlan_tpid;
+};
+
+struct ice_tc_pppoe_hdr {
+ __be16 session_id;
+ __be16 ppp_proto;
+};
+
+struct ice_tc_l2_hdr {
+ u8 dst_mac[ETH_ALEN];
+ u8 src_mac[ETH_ALEN];
+ __be16 n_proto; /* Ethernet Protocol */
+};
+
+struct ice_tc_l3_hdr {
+ u8 ip_proto; /* IPPROTO value */
+ union {
+ struct {
+ struct in_addr dst_ip;
+ struct in_addr src_ip;
+ } v4;
+ struct {
+ struct in6_addr dst_ip6;
+ struct in6_addr src_ip6;
+ } v6;
+ } ip;
+#define dst_ipv6 ip.v6.dst_ip6.s6_addr32
+#define dst_ipv6_addr ip.v6.dst_ip6.s6_addr
+#define src_ipv6 ip.v6.src_ip6.s6_addr32
+#define src_ipv6_addr ip.v6.src_ip6.s6_addr
+#define dst_ipv4 ip.v4.dst_ip.s_addr
+#define src_ipv4 ip.v4.src_ip.s_addr
+
+ u8 tos;
+ u8 ttl;
+};
+
+struct ice_tc_l2tpv3_hdr {
+ __be32 session_id;
+};
+
+struct ice_tc_l4_hdr {
+ __be16 dst_port;
+ __be16 src_port;
+};
+
+struct ice_tc_flower_lyr_2_4_hdrs {
+ /* L2 layer fields with their mask */
+ struct ice_tc_l2_hdr l2_key;
+ struct ice_tc_l2_hdr l2_mask;
+ struct ice_tc_vlan_hdr vlan_hdr;
+ struct ice_tc_vlan_hdr cvlan_hdr;
+ struct ice_tc_pppoe_hdr pppoe_hdr;
+ struct ice_tc_l2tpv3_hdr l2tpv3_hdr;
+ /* L3 (IPv4[6]) layer fields with their mask */
+ struct ice_tc_l3_hdr l3_key;
+ struct ice_tc_l3_hdr l3_mask;
+
+ /* L4 layer fields with their mask */
+ struct ice_tc_l4_hdr l4_key;
+ struct ice_tc_l4_hdr l4_mask;
+};
+
+enum ice_eswitch_fltr_direction {
+ ICE_ESWITCH_FLTR_INGRESS,
+ ICE_ESWITCH_FLTR_EGRESS,
+};
+
+struct ice_tc_flower_fltr {
+ struct hlist_node tc_flower_node;
+
+ /* cookie becomes filter_rule_id if rule is added successfully */
+ unsigned long cookie;
+
+ /* add_adv_rule returns information like recipe ID, rule_id. Store
+ * those values since they are needed to remove advanced rule
+ */
+ u16 rid;
+ u16 rule_id;
+ /* this could be queue/vsi_idx (sw handle)/queue_group, depending upon
+ * destination type
+ */
+ u16 dest_id;
+ /* if dest_id is vsi_idx, then need to store destination VSI ptr */
+ struct ice_vsi *dest_vsi;
+ /* direction of fltr for eswitch use case */
+ enum ice_eswitch_fltr_direction direction;
+
+ /* Parsed TC flower configuration params */
+ struct ice_tc_flower_lyr_2_4_hdrs outer_headers;
+ struct ice_tc_flower_lyr_2_4_hdrs inner_headers;
+ struct ice_vsi *src_vsi;
+ __be32 tenant_id;
+ struct gtp_pdu_session_info gtp_pdu_info_keys;
+ struct gtp_pdu_session_info gtp_pdu_info_masks;
+ u32 flags;
+ u8 tunnel_type;
+ struct ice_tc_flower_action action;
+
+ /* cache ptr which is used wherever needed to communicate netlink
+ * messages
+ */
+ struct netlink_ext_ack *extack;
+};
+
+/**
+ * ice_is_chnl_fltr - is this a valid channel filter
+ * @f: Pointer to tc-flower filter
+ *
+ * Criteria to determine of given filter is valid channel filter
+ * or not is based on its "destination". If destination is hw_tc (aka tc_class)
+ * and it is non-zero, then it is valid channel (aka ADQ) filter
+ */
+static inline bool ice_is_chnl_fltr(struct ice_tc_flower_fltr *f)
+{
+ return !!f->action.tc_class;
+}
+
+/**
+ * ice_chnl_dmac_fltr_cnt - DMAC based CHNL filter count
+ * @pf: Pointer to PF
+ */
+static inline int ice_chnl_dmac_fltr_cnt(struct ice_pf *pf)
+{
+ return pf->num_dmac_chnl_fltrs;
+}
+
+int
+ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi,
+ struct flow_cls_offload *cls_flower);
+int
+ice_del_cls_flower(struct ice_vsi *vsi, struct flow_cls_offload *cls_flower);
+void ice_replay_tc_fltrs(struct ice_pf *pf);
+bool ice_is_tunnel_supported(struct net_device *dev);
+
+#endif /* _ICE_TC_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_trace.h b/drivers/net/ethernet/intel/ice/ice_trace.h
new file mode 100644
index 000000000..ae98d5a8f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_trace.h
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021 Intel Corporation. */
+
+/* Modeled on trace-events-sample.h */
+
+/* The trace subsystem name for ice will be "ice".
+ *
+ * This file is named ice_trace.h.
+ *
+ * Since this include file's name is different from the trace
+ * subsystem name, we'll have to define TRACE_INCLUDE_FILE at the end
+ * of this file.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ice
+
+/* See trace-events-sample.h for a detailed description of why this
+ * guard clause is different from most normal include files.
+ */
+#if !defined(_ICE_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _ICE_TRACE_H_
+
+#include <linux/tracepoint.h>
+
+/* ice_trace() macro enables shared code to refer to trace points
+ * like:
+ *
+ * trace_ice_example(args...)
+ *
+ * ... as:
+ *
+ * ice_trace(example, args...)
+ *
+ * ... to resolve to the PF version of the tracepoint without
+ * ifdefs, and to allow tracepoints to be disabled entirely at build
+ * time.
+ *
+ * Trace point should always be referred to in the driver via this
+ * macro.
+ *
+ * Similarly, ice_trace_enabled(trace_name) wraps references to
+ * trace_ice_<trace_name>_enabled() functions.
+ * @trace_name: name of tracepoint
+ */
+#define _ICE_TRACE_NAME(trace_name) (trace_##ice##_##trace_name)
+#define ICE_TRACE_NAME(trace_name) _ICE_TRACE_NAME(trace_name)
+
+#define ice_trace(trace_name, args...) ICE_TRACE_NAME(trace_name)(args)
+
+#define ice_trace_enabled(trace_name) ICE_TRACE_NAME(trace_name##_enabled)()
+
+/* This is for events common to PF. Corresponding versions will be named
+ * trace_ice_*. The ice_trace() macro above will select the right trace point
+ * name for the driver.
+ */
+
+/* Begin tracepoints */
+
+/* Global tracepoints */
+
+/* Events related to DIM, q_vectors and ring containers */
+DECLARE_EVENT_CLASS(ice_rx_dim_template,
+ TP_PROTO(struct ice_q_vector *q_vector, struct dim *dim),
+ TP_ARGS(q_vector, dim),
+ TP_STRUCT__entry(__field(struct ice_q_vector *, q_vector)
+ __field(struct dim *, dim)
+ __string(devname, q_vector->rx.rx_ring->netdev->name)),
+
+ TP_fast_assign(__entry->q_vector = q_vector;
+ __entry->dim = dim;
+ __assign_str(devname, q_vector->rx.rx_ring->netdev->name);),
+
+ TP_printk("netdev: %s Rx-Q: %d dim-state: %d dim-profile: %d dim-tune: %d dim-st-right: %d dim-st-left: %d dim-tired: %d",
+ __get_str(devname),
+ __entry->q_vector->rx.rx_ring->q_index,
+ __entry->dim->state,
+ __entry->dim->profile_ix,
+ __entry->dim->tune_state,
+ __entry->dim->steps_right,
+ __entry->dim->steps_left,
+ __entry->dim->tired)
+);
+
+DEFINE_EVENT(ice_rx_dim_template, ice_rx_dim_work,
+ TP_PROTO(struct ice_q_vector *q_vector, struct dim *dim),
+ TP_ARGS(q_vector, dim)
+);
+
+DECLARE_EVENT_CLASS(ice_tx_dim_template,
+ TP_PROTO(struct ice_q_vector *q_vector, struct dim *dim),
+ TP_ARGS(q_vector, dim),
+ TP_STRUCT__entry(__field(struct ice_q_vector *, q_vector)
+ __field(struct dim *, dim)
+ __string(devname, q_vector->tx.tx_ring->netdev->name)),
+
+ TP_fast_assign(__entry->q_vector = q_vector;
+ __entry->dim = dim;
+ __assign_str(devname, q_vector->tx.tx_ring->netdev->name);),
+
+ TP_printk("netdev: %s Tx-Q: %d dim-state: %d dim-profile: %d dim-tune: %d dim-st-right: %d dim-st-left: %d dim-tired: %d",
+ __get_str(devname),
+ __entry->q_vector->tx.tx_ring->q_index,
+ __entry->dim->state,
+ __entry->dim->profile_ix,
+ __entry->dim->tune_state,
+ __entry->dim->steps_right,
+ __entry->dim->steps_left,
+ __entry->dim->tired)
+);
+
+DEFINE_EVENT(ice_tx_dim_template, ice_tx_dim_work,
+ TP_PROTO(struct ice_q_vector *q_vector, struct dim *dim),
+ TP_ARGS(q_vector, dim)
+);
+
+/* Events related to a vsi & ring */
+DECLARE_EVENT_CLASS(ice_tx_template,
+ TP_PROTO(struct ice_tx_ring *ring, struct ice_tx_desc *desc,
+ struct ice_tx_buf *buf),
+
+ TP_ARGS(ring, desc, buf),
+ TP_STRUCT__entry(__field(void *, ring)
+ __field(void *, desc)
+ __field(void *, buf)
+ __string(devname, ring->netdev->name)),
+
+ TP_fast_assign(__entry->ring = ring;
+ __entry->desc = desc;
+ __entry->buf = buf;
+ __assign_str(devname, ring->netdev->name);),
+
+ TP_printk("netdev: %s ring: %pK desc: %pK buf %pK", __get_str(devname),
+ __entry->ring, __entry->desc, __entry->buf)
+);
+
+#define DEFINE_TX_TEMPLATE_OP_EVENT(name) \
+DEFINE_EVENT(ice_tx_template, name, \
+ TP_PROTO(struct ice_tx_ring *ring, \
+ struct ice_tx_desc *desc, \
+ struct ice_tx_buf *buf), \
+ TP_ARGS(ring, desc, buf))
+
+DEFINE_TX_TEMPLATE_OP_EVENT(ice_clean_tx_irq);
+DEFINE_TX_TEMPLATE_OP_EVENT(ice_clean_tx_irq_unmap);
+DEFINE_TX_TEMPLATE_OP_EVENT(ice_clean_tx_irq_unmap_eop);
+
+DECLARE_EVENT_CLASS(ice_rx_template,
+ TP_PROTO(struct ice_rx_ring *ring, union ice_32b_rx_flex_desc *desc),
+
+ TP_ARGS(ring, desc),
+
+ TP_STRUCT__entry(__field(void *, ring)
+ __field(void *, desc)
+ __string(devname, ring->netdev->name)),
+
+ TP_fast_assign(__entry->ring = ring;
+ __entry->desc = desc;
+ __assign_str(devname, ring->netdev->name);),
+
+ TP_printk("netdev: %s ring: %pK desc: %pK", __get_str(devname),
+ __entry->ring, __entry->desc)
+);
+DEFINE_EVENT(ice_rx_template, ice_clean_rx_irq,
+ TP_PROTO(struct ice_rx_ring *ring, union ice_32b_rx_flex_desc *desc),
+ TP_ARGS(ring, desc)
+);
+
+DECLARE_EVENT_CLASS(ice_rx_indicate_template,
+ TP_PROTO(struct ice_rx_ring *ring, union ice_32b_rx_flex_desc *desc,
+ struct sk_buff *skb),
+
+ TP_ARGS(ring, desc, skb),
+
+ TP_STRUCT__entry(__field(void *, ring)
+ __field(void *, desc)
+ __field(void *, skb)
+ __string(devname, ring->netdev->name)),
+
+ TP_fast_assign(__entry->ring = ring;
+ __entry->desc = desc;
+ __entry->skb = skb;
+ __assign_str(devname, ring->netdev->name);),
+
+ TP_printk("netdev: %s ring: %pK desc: %pK skb %pK", __get_str(devname),
+ __entry->ring, __entry->desc, __entry->skb)
+);
+
+DEFINE_EVENT(ice_rx_indicate_template, ice_clean_rx_irq_indicate,
+ TP_PROTO(struct ice_rx_ring *ring, union ice_32b_rx_flex_desc *desc,
+ struct sk_buff *skb),
+ TP_ARGS(ring, desc, skb)
+);
+
+DECLARE_EVENT_CLASS(ice_xmit_template,
+ TP_PROTO(struct ice_tx_ring *ring, struct sk_buff *skb),
+
+ TP_ARGS(ring, skb),
+
+ TP_STRUCT__entry(__field(void *, ring)
+ __field(void *, skb)
+ __string(devname, ring->netdev->name)),
+
+ TP_fast_assign(__entry->ring = ring;
+ __entry->skb = skb;
+ __assign_str(devname, ring->netdev->name);),
+
+ TP_printk("netdev: %s skb: %pK ring: %pK", __get_str(devname),
+ __entry->skb, __entry->ring)
+);
+
+#define DEFINE_XMIT_TEMPLATE_OP_EVENT(name) \
+DEFINE_EVENT(ice_xmit_template, name, \
+ TP_PROTO(struct ice_tx_ring *ring, struct sk_buff *skb), \
+ TP_ARGS(ring, skb))
+
+DEFINE_XMIT_TEMPLATE_OP_EVENT(ice_xmit_frame_ring);
+DEFINE_XMIT_TEMPLATE_OP_EVENT(ice_xmit_frame_ring_drop);
+
+DECLARE_EVENT_CLASS(ice_tx_tstamp_template,
+ TP_PROTO(struct sk_buff *skb, int idx),
+
+ TP_ARGS(skb, idx),
+
+ TP_STRUCT__entry(__field(void *, skb)
+ __field(int, idx)),
+
+ TP_fast_assign(__entry->skb = skb;
+ __entry->idx = idx;),
+
+ TP_printk("skb %pK idx %d",
+ __entry->skb, __entry->idx)
+);
+#define DEFINE_TX_TSTAMP_OP_EVENT(name) \
+DEFINE_EVENT(ice_tx_tstamp_template, name, \
+ TP_PROTO(struct sk_buff *skb, int idx), \
+ TP_ARGS(skb, idx))
+
+DEFINE_TX_TSTAMP_OP_EVENT(ice_tx_tstamp_request);
+DEFINE_TX_TSTAMP_OP_EVENT(ice_tx_tstamp_fw_req);
+DEFINE_TX_TSTAMP_OP_EVENT(ice_tx_tstamp_fw_done);
+DEFINE_TX_TSTAMP_OP_EVENT(ice_tx_tstamp_complete);
+
+/* End tracepoints */
+
+#endif /* _ICE_TRACE_H_ */
+/* This must be outside ifdef _ICE_TRACE_H */
+
+/* This trace include file is not located in the .../include/trace
+ * with the kernel tracepoint definitions, because we're a loadable
+ * module.
+ */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE ../../drivers/net/ethernet/intel/ice/ice_trace
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
new file mode 100644
index 000000000..dbe80e505
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -0,0 +1,2520 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* The driver transmit and receive code */
+
+#include <linux/mm.h>
+#include <linux/netdevice.h>
+#include <linux/prefetch.h>
+#include <linux/bpf_trace.h>
+#include <net/dsfield.h>
+#include <net/mpls.h>
+#include <net/xdp.h>
+#include "ice_txrx_lib.h"
+#include "ice_lib.h"
+#include "ice.h"
+#include "ice_trace.h"
+#include "ice_dcb_lib.h"
+#include "ice_xsk.h"
+#include "ice_eswitch.h"
+
+#define ICE_RX_HDR_SIZE 256
+
+#define FDIR_DESC_RXDID 0x40
+#define ICE_FDIR_CLEAN_DELAY 10
+
+/**
+ * ice_prgm_fdir_fltr - Program a Flow Director filter
+ * @vsi: VSI to send dummy packet
+ * @fdir_desc: flow director descriptor
+ * @raw_packet: allocated buffer for flow director
+ */
+int
+ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc,
+ u8 *raw_packet)
+{
+ struct ice_tx_buf *tx_buf, *first;
+ struct ice_fltr_desc *f_desc;
+ struct ice_tx_desc *tx_desc;
+ struct ice_tx_ring *tx_ring;
+ struct device *dev;
+ dma_addr_t dma;
+ u32 td_cmd;
+ u16 i;
+
+ /* VSI and Tx ring */
+ if (!vsi)
+ return -ENOENT;
+ tx_ring = vsi->tx_rings[0];
+ if (!tx_ring || !tx_ring->desc)
+ return -ENOENT;
+ dev = tx_ring->dev;
+
+ /* we are using two descriptors to add/del a filter and we can wait */
+ for (i = ICE_FDIR_CLEAN_DELAY; ICE_DESC_UNUSED(tx_ring) < 2; i--) {
+ if (!i)
+ return -EAGAIN;
+ msleep_interruptible(1);
+ }
+
+ dma = dma_map_single(dev, raw_packet, ICE_FDIR_MAX_RAW_PKT_SIZE,
+ DMA_TO_DEVICE);
+
+ if (dma_mapping_error(dev, dma))
+ return -EINVAL;
+
+ /* grab the next descriptor */
+ i = tx_ring->next_to_use;
+ first = &tx_ring->tx_buf[i];
+ f_desc = ICE_TX_FDIRDESC(tx_ring, i);
+ memcpy(f_desc, fdir_desc, sizeof(*f_desc));
+
+ i++;
+ i = (i < tx_ring->count) ? i : 0;
+ tx_desc = ICE_TX_DESC(tx_ring, i);
+ tx_buf = &tx_ring->tx_buf[i];
+
+ i++;
+ tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+ memset(tx_buf, 0, sizeof(*tx_buf));
+ dma_unmap_len_set(tx_buf, len, ICE_FDIR_MAX_RAW_PKT_SIZE);
+ dma_unmap_addr_set(tx_buf, dma, dma);
+
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ td_cmd = ICE_TXD_LAST_DESC_CMD | ICE_TX_DESC_CMD_DUMMY |
+ ICE_TX_DESC_CMD_RE;
+
+ tx_buf->tx_flags = ICE_TX_FLAGS_DUMMY_PKT;
+ tx_buf->raw_buf = raw_packet;
+
+ tx_desc->cmd_type_offset_bsz =
+ ice_build_ctob(td_cmd, 0, ICE_FDIR_MAX_RAW_PKT_SIZE, 0);
+
+ /* Force memory write to complete before letting h/w know
+ * there are new descriptors to fetch.
+ */
+ wmb();
+
+ /* mark the data descriptor to be watched */
+ first->next_to_watch = tx_desc;
+
+ writel(tx_ring->next_to_use, tx_ring->tail);
+
+ return 0;
+}
+
+/**
+ * ice_unmap_and_free_tx_buf - Release a Tx buffer
+ * @ring: the ring that owns the buffer
+ * @tx_buf: the buffer to free
+ */
+static void
+ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf)
+{
+ if (tx_buf->skb) {
+ if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT)
+ devm_kfree(ring->dev, tx_buf->raw_buf);
+ else if (ice_ring_is_xdp(ring))
+ page_frag_free(tx_buf->raw_buf);
+ else
+ dev_kfree_skb_any(tx_buf->skb);
+ if (dma_unmap_len(tx_buf, len))
+ dma_unmap_single(ring->dev,
+ dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len),
+ DMA_TO_DEVICE);
+ } else if (dma_unmap_len(tx_buf, len)) {
+ dma_unmap_page(ring->dev,
+ dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len),
+ DMA_TO_DEVICE);
+ }
+
+ tx_buf->next_to_watch = NULL;
+ tx_buf->skb = NULL;
+ dma_unmap_len_set(tx_buf, len, 0);
+ /* tx_buf must be completely set up in the transmit path */
+}
+
+static struct netdev_queue *txring_txq(const struct ice_tx_ring *ring)
+{
+ return netdev_get_tx_queue(ring->netdev, ring->q_index);
+}
+
+/**
+ * ice_clean_tx_ring - Free any empty Tx buffers
+ * @tx_ring: ring to be cleaned
+ */
+void ice_clean_tx_ring(struct ice_tx_ring *tx_ring)
+{
+ u32 size;
+ u16 i;
+
+ if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_pool) {
+ ice_xsk_clean_xdp_ring(tx_ring);
+ goto tx_skip_free;
+ }
+
+ /* ring already cleared, nothing to do */
+ if (!tx_ring->tx_buf)
+ return;
+
+ /* Free all the Tx ring sk_buffs */
+ for (i = 0; i < tx_ring->count; i++)
+ ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
+
+tx_skip_free:
+ memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count);
+
+ size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc),
+ PAGE_SIZE);
+ /* Zero out the descriptor ring */
+ memset(tx_ring->desc, 0, size);
+
+ tx_ring->next_to_use = 0;
+ tx_ring->next_to_clean = 0;
+ tx_ring->next_dd = ICE_RING_QUARTER(tx_ring) - 1;
+ tx_ring->next_rs = ICE_RING_QUARTER(tx_ring) - 1;
+
+ if (!tx_ring->netdev)
+ return;
+
+ /* cleanup Tx queue statistics */
+ netdev_tx_reset_queue(txring_txq(tx_ring));
+}
+
+/**
+ * ice_free_tx_ring - Free Tx resources per queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ */
+void ice_free_tx_ring(struct ice_tx_ring *tx_ring)
+{
+ u32 size;
+
+ ice_clean_tx_ring(tx_ring);
+ devm_kfree(tx_ring->dev, tx_ring->tx_buf);
+ tx_ring->tx_buf = NULL;
+
+ if (tx_ring->desc) {
+ size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc),
+ PAGE_SIZE);
+ dmam_free_coherent(tx_ring->dev, size,
+ tx_ring->desc, tx_ring->dma);
+ tx_ring->desc = NULL;
+ }
+}
+
+/**
+ * ice_clean_tx_irq - Reclaim resources after transmit completes
+ * @tx_ring: Tx ring to clean
+ * @napi_budget: Used to determine if we are in netpoll
+ *
+ * Returns true if there's any budget left (e.g. the clean is finished)
+ */
+static bool ice_clean_tx_irq(struct ice_tx_ring *tx_ring, int napi_budget)
+{
+ unsigned int total_bytes = 0, total_pkts = 0;
+ unsigned int budget = ICE_DFLT_IRQ_WORK;
+ struct ice_vsi *vsi = tx_ring->vsi;
+ s16 i = tx_ring->next_to_clean;
+ struct ice_tx_desc *tx_desc;
+ struct ice_tx_buf *tx_buf;
+
+ /* get the bql data ready */
+ netdev_txq_bql_complete_prefetchw(txring_txq(tx_ring));
+
+ tx_buf = &tx_ring->tx_buf[i];
+ tx_desc = ICE_TX_DESC(tx_ring, i);
+ i -= tx_ring->count;
+
+ prefetch(&vsi->state);
+
+ do {
+ struct ice_tx_desc *eop_desc = tx_buf->next_to_watch;
+
+ /* if next_to_watch is not set then there is no work pending */
+ if (!eop_desc)
+ break;
+
+ /* follow the guidelines of other drivers */
+ prefetchw(&tx_buf->skb->users);
+
+ smp_rmb(); /* prevent any other reads prior to eop_desc */
+
+ ice_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf);
+ /* if the descriptor isn't done, no work yet to do */
+ if (!(eop_desc->cmd_type_offset_bsz &
+ cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+ break;
+
+ /* clear next_to_watch to prevent false hangs */
+ tx_buf->next_to_watch = NULL;
+
+ /* update the statistics for this packet */
+ total_bytes += tx_buf->bytecount;
+ total_pkts += tx_buf->gso_segs;
+
+ /* free the skb */
+ napi_consume_skb(tx_buf->skb, napi_budget);
+
+ /* unmap skb header data */
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len),
+ DMA_TO_DEVICE);
+
+ /* clear tx_buf data */
+ tx_buf->skb = NULL;
+ dma_unmap_len_set(tx_buf, len, 0);
+
+ /* unmap remaining buffers */
+ while (tx_desc != eop_desc) {
+ ice_trace(clean_tx_irq_unmap, tx_ring, tx_desc, tx_buf);
+ tx_buf++;
+ tx_desc++;
+ i++;
+ if (unlikely(!i)) {
+ i -= tx_ring->count;
+ tx_buf = tx_ring->tx_buf;
+ tx_desc = ICE_TX_DESC(tx_ring, 0);
+ }
+
+ /* unmap any remaining paged data */
+ if (dma_unmap_len(tx_buf, len)) {
+ dma_unmap_page(tx_ring->dev,
+ dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buf, len, 0);
+ }
+ }
+ ice_trace(clean_tx_irq_unmap_eop, tx_ring, tx_desc, tx_buf);
+
+ /* move us one more past the eop_desc for start of next pkt */
+ tx_buf++;
+ tx_desc++;
+ i++;
+ if (unlikely(!i)) {
+ i -= tx_ring->count;
+ tx_buf = tx_ring->tx_buf;
+ tx_desc = ICE_TX_DESC(tx_ring, 0);
+ }
+
+ prefetch(tx_desc);
+
+ /* update budget accounting */
+ budget--;
+ } while (likely(budget));
+
+ i += tx_ring->count;
+ tx_ring->next_to_clean = i;
+
+ ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes);
+ netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts, total_bytes);
+
+#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
+ if (unlikely(total_pkts && netif_carrier_ok(tx_ring->netdev) &&
+ (ICE_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
+ /* Make sure that anybody stopping the queue after this
+ * sees the new next_to_clean.
+ */
+ smp_mb();
+ if (netif_tx_queue_stopped(txring_txq(tx_ring)) &&
+ !test_bit(ICE_VSI_DOWN, vsi->state)) {
+ netif_tx_wake_queue(txring_txq(tx_ring));
+ ++tx_ring->tx_stats.restart_q;
+ }
+ }
+
+ return !!budget;
+}
+
+/**
+ * ice_setup_tx_ring - Allocate the Tx descriptors
+ * @tx_ring: the Tx ring to set up
+ *
+ * Return 0 on success, negative on error
+ */
+int ice_setup_tx_ring(struct ice_tx_ring *tx_ring)
+{
+ struct device *dev = tx_ring->dev;
+ u32 size;
+
+ if (!dev)
+ return -ENOMEM;
+
+ /* warn if we are about to overwrite the pointer */
+ WARN_ON(tx_ring->tx_buf);
+ tx_ring->tx_buf =
+ devm_kcalloc(dev, sizeof(*tx_ring->tx_buf), tx_ring->count,
+ GFP_KERNEL);
+ if (!tx_ring->tx_buf)
+ return -ENOMEM;
+
+ /* round up to nearest page */
+ size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc),
+ PAGE_SIZE);
+ tx_ring->desc = dmam_alloc_coherent(dev, size, &tx_ring->dma,
+ GFP_KERNEL);
+ if (!tx_ring->desc) {
+ dev_err(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
+ size);
+ goto err;
+ }
+
+ tx_ring->next_to_use = 0;
+ tx_ring->next_to_clean = 0;
+ tx_ring->tx_stats.prev_pkt = -1;
+ return 0;
+
+err:
+ devm_kfree(dev, tx_ring->tx_buf);
+ tx_ring->tx_buf = NULL;
+ return -ENOMEM;
+}
+
+/**
+ * ice_clean_rx_ring - Free Rx buffers
+ * @rx_ring: ring to be cleaned
+ */
+void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
+{
+ struct device *dev = rx_ring->dev;
+ u32 size;
+ u16 i;
+
+ /* ring already cleared, nothing to do */
+ if (!rx_ring->rx_buf)
+ return;
+
+ if (rx_ring->skb) {
+ dev_kfree_skb(rx_ring->skb);
+ rx_ring->skb = NULL;
+ }
+
+ if (rx_ring->xsk_pool) {
+ ice_xsk_clean_rx_ring(rx_ring);
+ goto rx_skip_free;
+ }
+
+ /* Free all the Rx ring sk_buffs */
+ for (i = 0; i < rx_ring->count; i++) {
+ struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
+
+ if (!rx_buf->page)
+ continue;
+
+ /* Invalidate cache lines that may have been written to by
+ * device so that we avoid corrupting memory.
+ */
+ dma_sync_single_range_for_cpu(dev, rx_buf->dma,
+ rx_buf->page_offset,
+ rx_ring->rx_buf_len,
+ DMA_FROM_DEVICE);
+
+ /* free resources associated with mapping */
+ dma_unmap_page_attrs(dev, rx_buf->dma, ice_rx_pg_size(rx_ring),
+ DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
+ __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
+
+ rx_buf->page = NULL;
+ rx_buf->page_offset = 0;
+ }
+
+rx_skip_free:
+ if (rx_ring->xsk_pool)
+ memset(rx_ring->xdp_buf, 0, array_size(rx_ring->count, sizeof(*rx_ring->xdp_buf)));
+ else
+ memset(rx_ring->rx_buf, 0, array_size(rx_ring->count, sizeof(*rx_ring->rx_buf)));
+
+ /* Zero out the descriptor ring */
+ size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
+ PAGE_SIZE);
+ memset(rx_ring->desc, 0, size);
+
+ rx_ring->next_to_alloc = 0;
+ rx_ring->next_to_clean = 0;
+ rx_ring->next_to_use = 0;
+}
+
+/**
+ * ice_free_rx_ring - Free Rx resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ */
+void ice_free_rx_ring(struct ice_rx_ring *rx_ring)
+{
+ u32 size;
+
+ ice_clean_rx_ring(rx_ring);
+ if (rx_ring->vsi->type == ICE_VSI_PF)
+ if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+ rx_ring->xdp_prog = NULL;
+ if (rx_ring->xsk_pool) {
+ kfree(rx_ring->xdp_buf);
+ rx_ring->xdp_buf = NULL;
+ } else {
+ kfree(rx_ring->rx_buf);
+ rx_ring->rx_buf = NULL;
+ }
+
+ if (rx_ring->desc) {
+ size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
+ PAGE_SIZE);
+ dmam_free_coherent(rx_ring->dev, size,
+ rx_ring->desc, rx_ring->dma);
+ rx_ring->desc = NULL;
+ }
+}
+
+/**
+ * ice_setup_rx_ring - Allocate the Rx descriptors
+ * @rx_ring: the Rx ring to set up
+ *
+ * Return 0 on success, negative on error
+ */
+int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
+{
+ struct device *dev = rx_ring->dev;
+ u32 size;
+
+ if (!dev)
+ return -ENOMEM;
+
+ /* warn if we are about to overwrite the pointer */
+ WARN_ON(rx_ring->rx_buf);
+ rx_ring->rx_buf =
+ kcalloc(rx_ring->count, sizeof(*rx_ring->rx_buf), GFP_KERNEL);
+ if (!rx_ring->rx_buf)
+ return -ENOMEM;
+
+ /* round up to nearest page */
+ size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
+ PAGE_SIZE);
+ rx_ring->desc = dmam_alloc_coherent(dev, size, &rx_ring->dma,
+ GFP_KERNEL);
+ if (!rx_ring->desc) {
+ dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
+ size);
+ goto err;
+ }
+
+ rx_ring->next_to_use = 0;
+ rx_ring->next_to_clean = 0;
+
+ if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+ WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
+
+ if (rx_ring->vsi->type == ICE_VSI_PF &&
+ !xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+ if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+ rx_ring->q_index, rx_ring->q_vector->napi.napi_id))
+ goto err;
+ return 0;
+
+err:
+ kfree(rx_ring->rx_buf);
+ rx_ring->rx_buf = NULL;
+ return -ENOMEM;
+}
+
+static unsigned int
+ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, unsigned int __maybe_unused size)
+{
+ unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+ truesize = ice_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
+#else
+ truesize = rx_ring->rx_offset ?
+ SKB_DATA_ALIGN(rx_ring->rx_offset + size) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
+ SKB_DATA_ALIGN(size);
+#endif
+ return truesize;
+}
+
+/**
+ * ice_run_xdp - Executes an XDP program on initialized xdp_buff
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ * @xdp_prog: XDP program to run
+ * @xdp_ring: ring to be used for XDP_TX action
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static int
+ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
+{
+ int err;
+ u32 act;
+
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+ switch (act) {
+ case XDP_PASS:
+ return ICE_XDP_PASS;
+ case XDP_TX:
+ if (static_branch_unlikely(&ice_xdp_locking_key))
+ spin_lock(&xdp_ring->tx_lock);
+ err = ice_xmit_xdp_ring(xdp->data, xdp->data_end - xdp->data, xdp_ring);
+ if (static_branch_unlikely(&ice_xdp_locking_key))
+ spin_unlock(&xdp_ring->tx_lock);
+ if (err == ICE_XDP_CONSUMED)
+ goto out_failure;
+ return err;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+ if (err)
+ goto out_failure;
+ return ICE_XDP_REDIR;
+ default:
+ bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
+ fallthrough;
+ case XDP_ABORTED:
+out_failure:
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+ fallthrough;
+ case XDP_DROP:
+ return ICE_XDP_CONSUMED;
+ }
+}
+
+/**
+ * ice_xdp_xmit - submit packets to XDP ring for transmission
+ * @dev: netdev
+ * @n: number of XDP frames to be transmitted
+ * @frames: XDP frames to be transmitted
+ * @flags: transmit flags
+ *
+ * Returns number of frames successfully sent. Failed frames
+ * will be free'ed by XDP core.
+ * For error cases, a negative errno code is returned and no-frames
+ * are transmitted (caller must handle freeing frames).
+ */
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags)
+{
+ struct ice_netdev_priv *np = netdev_priv(dev);
+ unsigned int queue_index = smp_processor_id();
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_tx_ring *xdp_ring;
+ int nxmit = 0, i;
+
+ if (test_bit(ICE_VSI_DOWN, vsi->state))
+ return -ENETDOWN;
+
+ if (!ice_is_xdp_ena_vsi(vsi))
+ return -ENXIO;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ if (static_branch_unlikely(&ice_xdp_locking_key)) {
+ queue_index %= vsi->num_xdp_txq;
+ xdp_ring = vsi->xdp_rings[queue_index];
+ spin_lock(&xdp_ring->tx_lock);
+ } else {
+ /* Generally, should not happen */
+ if (unlikely(queue_index >= vsi->num_xdp_txq))
+ return -ENXIO;
+ xdp_ring = vsi->xdp_rings[queue_index];
+ }
+
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ int err;
+
+ err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+ if (err != ICE_XDP_TX)
+ break;
+ nxmit++;
+ }
+
+ if (unlikely(flags & XDP_XMIT_FLUSH))
+ ice_xdp_ring_update_tail(xdp_ring);
+
+ if (static_branch_unlikely(&ice_xdp_locking_key))
+ spin_unlock(&xdp_ring->tx_lock);
+
+ return nxmit;
+}
+
+/**
+ * ice_alloc_mapped_page - recycle or make a new page
+ * @rx_ring: ring to use
+ * @bi: rx_buf struct to modify
+ *
+ * Returns true if the page was successfully allocated or
+ * reused.
+ */
+static bool
+ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi)
+{
+ struct page *page = bi->page;
+ dma_addr_t dma;
+
+ /* since we are recycling buffers we should seldom need to alloc */
+ if (likely(page))
+ return true;
+
+ /* alloc new page for storage */
+ page = dev_alloc_pages(ice_rx_pg_order(rx_ring));
+ if (unlikely(!page)) {
+ rx_ring->rx_stats.alloc_page_failed++;
+ return false;
+ }
+
+ /* map page for use */
+ dma = dma_map_page_attrs(rx_ring->dev, page, 0, ice_rx_pg_size(rx_ring),
+ DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
+
+ /* if mapping failed free memory back to system since
+ * there isn't much point in holding memory we can't use
+ */
+ if (dma_mapping_error(rx_ring->dev, dma)) {
+ __free_pages(page, ice_rx_pg_order(rx_ring));
+ rx_ring->rx_stats.alloc_page_failed++;
+ return false;
+ }
+
+ bi->dma = dma;
+ bi->page = page;
+ bi->page_offset = rx_ring->rx_offset;
+ page_ref_add(page, USHRT_MAX - 1);
+ bi->pagecnt_bias = USHRT_MAX;
+
+ return true;
+}
+
+/**
+ * ice_alloc_rx_bufs - Replace used receive buffers
+ * @rx_ring: ring to place buffers on
+ * @cleaned_count: number of buffers to replace
+ *
+ * Returns false if all allocations were successful, true if any fail. Returning
+ * true signals to the caller that we didn't replace cleaned_count buffers and
+ * there is more work to do.
+ *
+ * First, try to clean "cleaned_count" Rx buffers. Then refill the cleaned Rx
+ * buffers. Then bump tail at most one time. Grouping like this lets us avoid
+ * multiple tail writes per call.
+ */
+bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, u16 cleaned_count)
+{
+ union ice_32b_rx_flex_desc *rx_desc;
+ u16 ntu = rx_ring->next_to_use;
+ struct ice_rx_buf *bi;
+
+ /* do nothing if no valid netdev defined */
+ if ((!rx_ring->netdev && rx_ring->vsi->type != ICE_VSI_CTRL) ||
+ !cleaned_count)
+ return false;
+
+ /* get the Rx descriptor and buffer based on next_to_use */
+ rx_desc = ICE_RX_DESC(rx_ring, ntu);
+ bi = &rx_ring->rx_buf[ntu];
+
+ do {
+ /* if we fail here, we have work remaining */
+ if (!ice_alloc_mapped_page(rx_ring, bi))
+ break;
+
+ /* sync the buffer for use by the device */
+ dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+ bi->page_offset,
+ rx_ring->rx_buf_len,
+ DMA_FROM_DEVICE);
+
+ /* Refresh the desc even if buffer_addrs didn't change
+ * because each write-back erases this info.
+ */
+ rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+
+ rx_desc++;
+ bi++;
+ ntu++;
+ if (unlikely(ntu == rx_ring->count)) {
+ rx_desc = ICE_RX_DESC(rx_ring, 0);
+ bi = rx_ring->rx_buf;
+ ntu = 0;
+ }
+
+ /* clear the status bits for the next_to_use descriptor */
+ rx_desc->wb.status_error0 = 0;
+
+ cleaned_count--;
+ } while (cleaned_count);
+
+ if (rx_ring->next_to_use != ntu)
+ ice_release_rx_desc(rx_ring, ntu);
+
+ return !!cleaned_count;
+}
+
+/**
+ * ice_rx_buf_adjust_pg_offset - Prepare Rx buffer for reuse
+ * @rx_buf: Rx buffer to adjust
+ * @size: Size of adjustment
+ *
+ * Update the offset within page so that Rx buf will be ready to be reused.
+ * For systems with PAGE_SIZE < 8192 this function will flip the page offset
+ * so the second half of page assigned to Rx buffer will be used, otherwise
+ * the offset is moved by "size" bytes
+ */
+static void
+ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
+{
+#if (PAGE_SIZE < 8192)
+ /* flip page offset to other buffer */
+ rx_buf->page_offset ^= size;
+#else
+ /* move offset up to the next cache line */
+ rx_buf->page_offset += size;
+#endif
+}
+
+/**
+ * ice_can_reuse_rx_page - Determine if page can be reused for another Rx
+ * @rx_buf: buffer containing the page
+ * @rx_buf_pgcnt: rx_buf page refcount pre xdp_do_redirect() call
+ *
+ * If page is reusable, we have a green light for calling ice_reuse_rx_page,
+ * which will assign the current buffer to the buffer that next_to_alloc is
+ * pointing to; otherwise, the DMA mapping needs to be destroyed and
+ * page freed
+ */
+static bool
+ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt)
+{
+ unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
+ struct page *page = rx_buf->page;
+
+ /* avoid re-using remote and pfmemalloc pages */
+ if (!dev_page_is_reusable(page))
+ return false;
+
+#if (PAGE_SIZE < 8192)
+ /* if we are only owner of page we can reuse it */
+ if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1))
+ return false;
+#else
+#define ICE_LAST_OFFSET \
+ (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048)
+ if (rx_buf->page_offset > ICE_LAST_OFFSET)
+ return false;
+#endif /* PAGE_SIZE < 8192) */
+
+ /* If we have drained the page fragment pool we need to update
+ * the pagecnt_bias and page count so that we fully restock the
+ * number of references the driver holds.
+ */
+ if (unlikely(pagecnt_bias == 1)) {
+ page_ref_add(page, USHRT_MAX - 1);
+ rx_buf->pagecnt_bias = USHRT_MAX;
+ }
+
+ return true;
+}
+
+/**
+ * ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @rx_buf: buffer containing page to add
+ * @skb: sk_buff to place the data into
+ * @size: packet length from rx_desc
+ *
+ * This function will add the data contained in rx_buf->page to the skb.
+ * It will just attach the page as a frag to the skb.
+ * The function will then update the page offset.
+ */
+static void
+ice_add_rx_frag(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
+ struct sk_buff *skb, unsigned int size)
+{
+#if (PAGE_SIZE >= 8192)
+ unsigned int truesize = SKB_DATA_ALIGN(size + rx_ring->rx_offset);
+#else
+ unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
+#endif
+
+ if (!size)
+ return;
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page,
+ rx_buf->page_offset, size, truesize);
+
+ /* page is being used so we must update the page offset */
+ ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+}
+
+/**
+ * ice_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: Rx descriptor ring to store buffers on
+ * @old_buf: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ */
+static void
+ice_reuse_rx_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *old_buf)
+{
+ u16 nta = rx_ring->next_to_alloc;
+ struct ice_rx_buf *new_buf;
+
+ new_buf = &rx_ring->rx_buf[nta];
+
+ /* update, and store next to alloc */
+ nta++;
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+ /* Transfer page from old buffer to new buffer.
+ * Move each member individually to avoid possible store
+ * forwarding stalls and unnecessary copy of skb.
+ */
+ new_buf->dma = old_buf->dma;
+ new_buf->page = old_buf->page;
+ new_buf->page_offset = old_buf->page_offset;
+ new_buf->pagecnt_bias = old_buf->pagecnt_bias;
+}
+
+/**
+ * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @size: size of buffer to add to skb
+ * @rx_buf_pgcnt: rx_buf page refcount
+ *
+ * This function will pull an Rx buffer from the ring and synchronize it
+ * for use by the CPU.
+ */
+static struct ice_rx_buf *
+ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
+ int *rx_buf_pgcnt)
+{
+ struct ice_rx_buf *rx_buf;
+
+ rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
+ *rx_buf_pgcnt =
+#if (PAGE_SIZE < 8192)
+ page_count(rx_buf->page);
+#else
+ 0;
+#endif
+ prefetchw(rx_buf->page);
+
+ if (!size)
+ return rx_buf;
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma,
+ rx_buf->page_offset, size,
+ DMA_FROM_DEVICE);
+
+ /* We have pulled a buffer for use, so decrement pagecnt_bias */
+ rx_buf->pagecnt_bias--;
+
+ return rx_buf;
+}
+
+/**
+ * ice_build_skb - Build skb around an existing buffer
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @rx_buf: Rx buffer to pull data from
+ * @xdp: xdp_buff pointing to the data
+ *
+ * This function builds an skb around an existing Rx buffer, taking care
+ * to set up the skb correctly and avoid any memcpy overhead.
+ */
+static struct sk_buff *
+ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
+ struct xdp_buff *xdp)
+{
+ u8 metasize = xdp->data - xdp->data_meta;
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
+#else
+ unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+ SKB_DATA_ALIGN(xdp->data_end -
+ xdp->data_hard_start);
+#endif
+ struct sk_buff *skb;
+
+ /* Prefetch first cache line of first page. If xdp->data_meta
+ * is unused, this points exactly as xdp->data, otherwise we
+ * likely have a consumer accessing first few bytes of meta
+ * data, and then actual data.
+ */
+ net_prefetch(xdp->data_meta);
+ /* build an skb around the page buffer */
+ skb = napi_build_skb(xdp->data_hard_start, truesize);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* must to record Rx queue, otherwise OS features such as
+ * symmetric queue won't work
+ */
+ skb_record_rx_queue(skb, rx_ring->q_index);
+
+ /* update pointers within the skb to store the data */
+ skb_reserve(skb, xdp->data - xdp->data_hard_start);
+ __skb_put(skb, xdp->data_end - xdp->data);
+ if (metasize)
+ skb_metadata_set(skb, metasize);
+
+ /* buffer is used by skb, update page_offset */
+ ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+
+ return skb;
+}
+
+/**
+ * ice_construct_skb - Allocate skb and populate it
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @rx_buf: Rx buffer to pull data from
+ * @xdp: xdp_buff pointing to the data
+ *
+ * This function allocates an skb. It then populates it with the page
+ * data from the current receive descriptor, taking care to set up the
+ * skb correctly.
+ */
+static struct sk_buff *
+ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
+ struct xdp_buff *xdp)
+{
+ unsigned int metasize = xdp->data - xdp->data_meta;
+ unsigned int size = xdp->data_end - xdp->data;
+ unsigned int headlen;
+ struct sk_buff *skb;
+
+ /* prefetch first cache line of first page */
+ net_prefetch(xdp->data_meta);
+
+ /* allocate a skb to store the frags */
+ skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
+ ICE_RX_HDR_SIZE + metasize,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!skb))
+ return NULL;
+
+ skb_record_rx_queue(skb, rx_ring->q_index);
+ /* Determine available headroom for copy */
+ headlen = size;
+ if (headlen > ICE_RX_HDR_SIZE)
+ headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE);
+
+ /* align pull length to size of long to optimize memcpy performance */
+ memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta,
+ ALIGN(headlen + metasize, sizeof(long)));
+
+ if (metasize) {
+ skb_metadata_set(skb, metasize);
+ __skb_pull(skb, metasize);
+ }
+
+ /* if we exhaust the linear part then add what is left as a frag */
+ size -= headlen;
+ if (size) {
+#if (PAGE_SIZE >= 8192)
+ unsigned int truesize = SKB_DATA_ALIGN(size);
+#else
+ unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
+#endif
+ skb_add_rx_frag(skb, 0, rx_buf->page,
+ rx_buf->page_offset + headlen, size, truesize);
+ /* buffer is used by skb, update page_offset */
+ ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+ } else {
+ /* buffer is unused, reset bias back to rx_buf; data was copied
+ * onto skb's linear part so there's no need for adjusting
+ * page offset and we can reuse this buffer as-is
+ */
+ rx_buf->pagecnt_bias++;
+ }
+
+ return skb;
+}
+
+/**
+ * ice_put_rx_buf - Clean up used buffer and either recycle or free
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @rx_buf: Rx buffer to pull data from
+ * @rx_buf_pgcnt: Rx buffer page count pre xdp_do_redirect()
+ *
+ * This function will update next_to_clean and then clean up the contents
+ * of the rx_buf. It will either recycle the buffer or unmap it and free
+ * the associated resources.
+ */
+static void
+ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
+ int rx_buf_pgcnt)
+{
+ u16 ntc = rx_ring->next_to_clean + 1;
+
+ /* fetch, update, and store next to clean */
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+
+ if (!rx_buf)
+ return;
+
+ if (ice_can_reuse_rx_page(rx_buf, rx_buf_pgcnt)) {
+ /* hand second half of page back to the ring */
+ ice_reuse_rx_page(rx_ring, rx_buf);
+ } else {
+ /* we are not reusing the buffer so unmap it */
+ dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma,
+ ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
+ ICE_RX_DMA_ATTR);
+ __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
+ }
+
+ /* clear contents of buffer_info */
+ rx_buf->page = NULL;
+}
+
+/**
+ * ice_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ *
+ * If the buffer is an EOP buffer, this function exits returning false,
+ * otherwise return true indicating that this is in fact a non-EOP buffer.
+ */
+static bool
+ice_is_non_eop(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc)
+{
+ /* if we are the last buffer then there is nothing else to do */
+#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
+ if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF)))
+ return false;
+
+ rx_ring->rx_stats.non_eop_descs++;
+
+ return true;
+}
+
+/**
+ * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @budget: Total limit on number of packets to process
+ *
+ * This function provides a "bounce buffer" approach to Rx interrupt
+ * processing. The advantage to this is that on systems that have
+ * expensive overhead for IOMMU access this provides a means of avoiding
+ * it by maintaining the mapping of the page to the system.
+ *
+ * Returns amount of work completed
+ */
+int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+{
+ unsigned int total_rx_bytes = 0, total_rx_pkts = 0, frame_sz = 0;
+ u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+ unsigned int offset = rx_ring->rx_offset;
+ struct ice_tx_ring *xdp_ring = NULL;
+ unsigned int xdp_res, xdp_xmit = 0;
+ struct sk_buff *skb = rx_ring->skb;
+ struct bpf_prog *xdp_prog = NULL;
+ struct xdp_buff xdp;
+ bool failure;
+
+ /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
+#if (PAGE_SIZE < 8192)
+ frame_sz = ice_rx_frame_truesize(rx_ring, 0);
+#endif
+ xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq);
+
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+ if (xdp_prog)
+ xdp_ring = rx_ring->xdp_ring;
+
+ /* start the loop to process Rx packets bounded by 'budget' */
+ while (likely(total_rx_pkts < (unsigned int)budget)) {
+ union ice_32b_rx_flex_desc *rx_desc;
+ struct ice_rx_buf *rx_buf;
+ unsigned char *hard_start;
+ unsigned int size;
+ u16 stat_err_bits;
+ int rx_buf_pgcnt;
+ u16 vlan_tag = 0;
+ u16 rx_ptype;
+
+ /* get the Rx desc from Rx ring based on 'next_to_clean' */
+ rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+ /* status_error_len will always be zero for unused descriptors
+ * because it's cleared in cleanup, and overlaps with hdr_addr
+ * which is always zero because packet split isn't used, if the
+ * hardware wrote DD then it will be non-zero
+ */
+ stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
+ if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
+ break;
+
+ /* This memory barrier is needed to keep us from reading
+ * any other fields out of the rx_desc until we know the
+ * DD bit is set.
+ */
+ dma_rmb();
+
+ ice_trace(clean_rx_irq, rx_ring, rx_desc);
+ if (rx_desc->wb.rxdid == FDIR_DESC_RXDID || !rx_ring->netdev) {
+ struct ice_vsi *ctrl_vsi = rx_ring->vsi;
+
+ if (rx_desc->wb.rxdid == FDIR_DESC_RXDID &&
+ ctrl_vsi->vf)
+ ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc);
+ ice_put_rx_buf(rx_ring, NULL, 0);
+ cleaned_count++;
+ continue;
+ }
+
+ size = le16_to_cpu(rx_desc->wb.pkt_len) &
+ ICE_RX_FLX_DESC_PKT_LEN_M;
+
+ /* retrieve a buffer from the ring */
+ rx_buf = ice_get_rx_buf(rx_ring, size, &rx_buf_pgcnt);
+
+ if (!size) {
+ xdp.data = NULL;
+ xdp.data_end = NULL;
+ xdp.data_hard_start = NULL;
+ xdp.data_meta = NULL;
+ goto construct_skb;
+ }
+
+ hard_start = page_address(rx_buf->page) + rx_buf->page_offset -
+ offset;
+ xdp_prepare_buff(&xdp, hard_start, offset, size, true);
+#if (PAGE_SIZE > 4096)
+ /* At larger PAGE_SIZE, frame_sz depend on len size */
+ xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size);
+#endif
+
+ if (!xdp_prog)
+ goto construct_skb;
+
+ xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog, xdp_ring);
+ if (!xdp_res)
+ goto construct_skb;
+ if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+ xdp_xmit |= xdp_res;
+ ice_rx_buf_adjust_pg_offset(rx_buf, xdp.frame_sz);
+ } else {
+ rx_buf->pagecnt_bias++;
+ }
+ total_rx_bytes += size;
+ total_rx_pkts++;
+
+ cleaned_count++;
+ ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt);
+ continue;
+construct_skb:
+ if (skb) {
+ ice_add_rx_frag(rx_ring, rx_buf, skb, size);
+ } else if (likely(xdp.data)) {
+ if (ice_ring_uses_build_skb(rx_ring))
+ skb = ice_build_skb(rx_ring, rx_buf, &xdp);
+ else
+ skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
+ }
+ /* exit if we failed to retrieve a buffer */
+ if (!skb) {
+ rx_ring->rx_stats.alloc_buf_failed++;
+ if (rx_buf)
+ rx_buf->pagecnt_bias++;
+ break;
+ }
+
+ ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt);
+ cleaned_count++;
+
+ /* skip if it is NOP desc */
+ if (ice_is_non_eop(rx_ring, rx_desc))
+ continue;
+
+ stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
+ if (unlikely(ice_test_staterr(rx_desc->wb.status_error0,
+ stat_err_bits))) {
+ dev_kfree_skb_any(skb);
+ continue;
+ }
+
+ vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc);
+
+ /* pad the skb if needed, to make a valid ethernet frame */
+ if (eth_skb_pad(skb)) {
+ skb = NULL;
+ continue;
+ }
+
+ /* probably a little skewed due to removing CRC */
+ total_rx_bytes += skb->len;
+
+ /* populate checksum, VLAN, and protocol */
+ rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+ ICE_RX_FLEX_DESC_PTYPE_M;
+
+ ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+
+ ice_trace(clean_rx_irq_indicate, rx_ring, rx_desc, skb);
+ /* send completed skb up the stack */
+ ice_receive_skb(rx_ring, skb, vlan_tag);
+ skb = NULL;
+
+ /* update budget accounting */
+ total_rx_pkts++;
+ }
+
+ /* return up to cleaned_count buffers to hardware */
+ failure = ice_alloc_rx_bufs(rx_ring, cleaned_count);
+
+ if (xdp_prog)
+ ice_finalize_xdp_rx(xdp_ring, xdp_xmit);
+ rx_ring->skb = skb;
+
+ ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes);
+
+ /* guarantee a trip back through this routine if there was a failure */
+ return failure ? budget : (int)total_rx_pkts;
+}
+
+static void __ice_update_sample(struct ice_q_vector *q_vector,
+ struct ice_ring_container *rc,
+ struct dim_sample *sample,
+ bool is_tx)
+{
+ u64 packets = 0, bytes = 0;
+
+ if (is_tx) {
+ struct ice_tx_ring *tx_ring;
+
+ ice_for_each_tx_ring(tx_ring, *rc) {
+ packets += tx_ring->stats.pkts;
+ bytes += tx_ring->stats.bytes;
+ }
+ } else {
+ struct ice_rx_ring *rx_ring;
+
+ ice_for_each_rx_ring(rx_ring, *rc) {
+ packets += rx_ring->stats.pkts;
+ bytes += rx_ring->stats.bytes;
+ }
+ }
+
+ dim_update_sample(q_vector->total_events, packets, bytes, sample);
+ sample->comp_ctr = 0;
+
+ /* if dim settings get stale, like when not updated for 1
+ * second or longer, force it to start again. This addresses the
+ * frequent case of an idle queue being switched to by the
+ * scheduler. The 1,000 here means 1,000 milliseconds.
+ */
+ if (ktime_ms_delta(sample->time, rc->dim.start_sample.time) >= 1000)
+ rc->dim.state = DIM_START_MEASURE;
+}
+
+/**
+ * ice_net_dim - Update net DIM algorithm
+ * @q_vector: the vector associated with the interrupt
+ *
+ * Create a DIM sample and notify net_dim() so that it can possibly decide
+ * a new ITR value based on incoming packets, bytes, and interrupts.
+ *
+ * This function is a no-op if the ring is not configured to dynamic ITR.
+ */
+static void ice_net_dim(struct ice_q_vector *q_vector)
+{
+ struct ice_ring_container *tx = &q_vector->tx;
+ struct ice_ring_container *rx = &q_vector->rx;
+
+ if (ITR_IS_DYNAMIC(tx)) {
+ struct dim_sample dim_sample;
+
+ __ice_update_sample(q_vector, tx, &dim_sample, true);
+ net_dim(&tx->dim, dim_sample);
+ }
+
+ if (ITR_IS_DYNAMIC(rx)) {
+ struct dim_sample dim_sample;
+
+ __ice_update_sample(q_vector, rx, &dim_sample, false);
+ net_dim(&rx->dim, dim_sample);
+ }
+}
+
+/**
+ * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register
+ * @itr_idx: interrupt throttling index
+ * @itr: interrupt throttling value in usecs
+ */
+static u32 ice_buildreg_itr(u16 itr_idx, u16 itr)
+{
+ /* The ITR value is reported in microseconds, and the register value is
+ * recorded in 2 microsecond units. For this reason we only need to
+ * shift by the GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S to apply this
+ * granularity as a shift instead of division. The mask makes sure the
+ * ITR value is never odd so we don't accidentally write into the field
+ * prior to the ITR field.
+ */
+ itr &= ICE_ITR_MASK;
+
+ return GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
+ (itr_idx << GLINT_DYN_CTL_ITR_INDX_S) |
+ (itr << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S));
+}
+
+/**
+ * ice_enable_interrupt - re-enable MSI-X interrupt
+ * @q_vector: the vector associated with the interrupt to enable
+ *
+ * If the VSI is down, the interrupt will not be re-enabled. Also,
+ * when enabling the interrupt always reset the wb_on_itr to false
+ * and trigger a software interrupt to clean out internal state.
+ */
+static void ice_enable_interrupt(struct ice_q_vector *q_vector)
+{
+ struct ice_vsi *vsi = q_vector->vsi;
+ bool wb_en = q_vector->wb_on_itr;
+ u32 itr_val;
+
+ if (test_bit(ICE_DOWN, vsi->state))
+ return;
+
+ /* trigger an ITR delayed software interrupt when exiting busy poll, to
+ * make sure to catch any pending cleanups that might have been missed
+ * due to interrupt state transition. If busy poll or poll isn't
+ * enabled, then don't update ITR, and just enable the interrupt.
+ */
+ if (!wb_en) {
+ itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0);
+ } else {
+ q_vector->wb_on_itr = false;
+
+ /* do two things here with a single write. Set up the third ITR
+ * index to be used for software interrupt moderation, and then
+ * trigger a software interrupt with a rate limit of 20K on
+ * software interrupts, this will help avoid high interrupt
+ * loads due to frequently polling and exiting polling.
+ */
+ itr_val = ice_buildreg_itr(ICE_IDX_ITR2, ICE_ITR_20K);
+ itr_val |= GLINT_DYN_CTL_SWINT_TRIG_M |
+ ICE_IDX_ITR2 << GLINT_DYN_CTL_SW_ITR_INDX_S |
+ GLINT_DYN_CTL_SW_ITR_INDX_ENA_M;
+ }
+ wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
+}
+
+/**
+ * ice_set_wb_on_itr - set WB_ON_ITR for this q_vector
+ * @q_vector: q_vector to set WB_ON_ITR on
+ *
+ * We need to tell hardware to write-back completed descriptors even when
+ * interrupts are disabled. Descriptors will be written back on cache line
+ * boundaries without WB_ON_ITR enabled, but if we don't enable WB_ON_ITR
+ * descriptors may not be written back if they don't fill a cache line until
+ * the next interrupt.
+ *
+ * This sets the write-back frequency to whatever was set previously for the
+ * ITR indices. Also, set the INTENA_MSK bit to make sure hardware knows we
+ * aren't meddling with the INTENA_M bit.
+ */
+static void ice_set_wb_on_itr(struct ice_q_vector *q_vector)
+{
+ struct ice_vsi *vsi = q_vector->vsi;
+
+ /* already in wb_on_itr mode no need to change it */
+ if (q_vector->wb_on_itr)
+ return;
+
+ /* use previously set ITR values for all of the ITR indices by
+ * specifying ICE_ITR_NONE, which will vary in adaptive (AIM) mode and
+ * be static in non-adaptive mode (user configured)
+ */
+ wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
+ ((ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) &
+ GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M |
+ GLINT_DYN_CTL_WB_ON_ITR_M);
+
+ q_vector->wb_on_itr = true;
+}
+
+/**
+ * ice_napi_poll - NAPI polling Rx/Tx cleanup routine
+ * @napi: napi struct with our devices info in it
+ * @budget: amount of work driver is allowed to do this pass, in packets
+ *
+ * This function will clean all queues associated with a q_vector.
+ *
+ * Returns the amount of work done
+ */
+int ice_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct ice_q_vector *q_vector =
+ container_of(napi, struct ice_q_vector, napi);
+ struct ice_tx_ring *tx_ring;
+ struct ice_rx_ring *rx_ring;
+ bool clean_complete = true;
+ int budget_per_ring;
+ int work_done = 0;
+
+ /* Since the actual Tx work is minimal, we can give the Tx a larger
+ * budget and be more aggressive about cleaning up the Tx descriptors.
+ */
+ ice_for_each_tx_ring(tx_ring, q_vector->tx) {
+ bool wd;
+
+ if (tx_ring->xsk_pool)
+ wd = ice_xmit_zc(tx_ring);
+ else if (ice_ring_is_xdp(tx_ring))
+ wd = true;
+ else
+ wd = ice_clean_tx_irq(tx_ring, budget);
+
+ if (!wd)
+ clean_complete = false;
+ }
+
+ /* Handle case where we are called by netpoll with a budget of 0 */
+ if (unlikely(budget <= 0))
+ return budget;
+
+ /* normally we have 1 Rx ring per q_vector */
+ if (unlikely(q_vector->num_ring_rx > 1))
+ /* We attempt to distribute budget to each Rx queue fairly, but
+ * don't allow the budget to go below 1 because that would exit
+ * polling early.
+ */
+ budget_per_ring = max_t(int, budget / q_vector->num_ring_rx, 1);
+ else
+ /* Max of 1 Rx ring in this q_vector so give it the budget */
+ budget_per_ring = budget;
+
+ ice_for_each_rx_ring(rx_ring, q_vector->rx) {
+ int cleaned;
+
+ /* A dedicated path for zero-copy allows making a single
+ * comparison in the irq context instead of many inside the
+ * ice_clean_rx_irq function and makes the codebase cleaner.
+ */
+ cleaned = rx_ring->xsk_pool ?
+ ice_clean_rx_irq_zc(rx_ring, budget_per_ring) :
+ ice_clean_rx_irq(rx_ring, budget_per_ring);
+ work_done += cleaned;
+ /* if we clean as many as budgeted, we must not be done */
+ if (cleaned >= budget_per_ring)
+ clean_complete = false;
+ }
+
+ /* If work not completed, return budget and polling will return */
+ if (!clean_complete) {
+ /* Set the writeback on ITR so partial completions of
+ * cache-lines will still continue even if we're polling.
+ */
+ ice_set_wb_on_itr(q_vector);
+ return budget;
+ }
+
+ /* Exit the polling mode, but don't re-enable interrupts if stack might
+ * poll us due to busy-polling
+ */
+ if (napi_complete_done(napi, work_done)) {
+ ice_net_dim(q_vector);
+ ice_enable_interrupt(q_vector);
+ } else {
+ ice_set_wb_on_itr(q_vector);
+ }
+
+ return min_t(int, work_done, budget - 1);
+}
+
+/**
+ * __ice_maybe_stop_tx - 2nd level check for Tx stop conditions
+ * @tx_ring: the ring to be checked
+ * @size: the size buffer we want to assure is available
+ *
+ * Returns -EBUSY if a stop is needed, else 0
+ */
+static int __ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size)
+{
+ netif_tx_stop_queue(txring_txq(tx_ring));
+ /* Memory barrier before checking head and tail */
+ smp_mb();
+
+ /* Check again in a case another CPU has just made room available. */
+ if (likely(ICE_DESC_UNUSED(tx_ring) < size))
+ return -EBUSY;
+
+ /* A reprieve! - use start_queue because it doesn't call schedule */
+ netif_tx_start_queue(txring_txq(tx_ring));
+ ++tx_ring->tx_stats.restart_q;
+ return 0;
+}
+
+/**
+ * ice_maybe_stop_tx - 1st level check for Tx stop conditions
+ * @tx_ring: the ring to be checked
+ * @size: the size buffer we want to assure is available
+ *
+ * Returns 0 if stop is not needed
+ */
+static int ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size)
+{
+ if (likely(ICE_DESC_UNUSED(tx_ring) >= size))
+ return 0;
+
+ return __ice_maybe_stop_tx(tx_ring, size);
+}
+
+/**
+ * ice_tx_map - Build the Tx descriptor
+ * @tx_ring: ring to send buffer on
+ * @first: first buffer info buffer to use
+ * @off: pointer to struct that holds offload parameters
+ *
+ * This function loops over the skb data pointed to by *first
+ * and gets a physical address for each memory location and programs
+ * it and the length into the transmit descriptor.
+ */
+static void
+ice_tx_map(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first,
+ struct ice_tx_offload_params *off)
+{
+ u64 td_offset, td_tag, td_cmd;
+ u16 i = tx_ring->next_to_use;
+ unsigned int data_len, size;
+ struct ice_tx_desc *tx_desc;
+ struct ice_tx_buf *tx_buf;
+ struct sk_buff *skb;
+ skb_frag_t *frag;
+ dma_addr_t dma;
+ bool kick;
+
+ td_tag = off->td_l2tag1;
+ td_cmd = off->td_cmd;
+ td_offset = off->td_offset;
+ skb = first->skb;
+
+ data_len = skb->data_len;
+ size = skb_headlen(skb);
+
+ tx_desc = ICE_TX_DESC(tx_ring, i);
+
+ if (first->tx_flags & ICE_TX_FLAGS_HW_VLAN) {
+ td_cmd |= (u64)ICE_TX_DESC_CMD_IL2TAG1;
+ td_tag = (first->tx_flags & ICE_TX_FLAGS_VLAN_M) >>
+ ICE_TX_FLAGS_VLAN_S;
+ }
+
+ dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+ tx_buf = first;
+
+ for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+ unsigned int max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
+
+ if (dma_mapping_error(tx_ring->dev, dma))
+ goto dma_error;
+
+ /* record length, and DMA address */
+ dma_unmap_len_set(tx_buf, len, size);
+ dma_unmap_addr_set(tx_buf, dma, dma);
+
+ /* align size to end of page */
+ max_data += -dma & (ICE_MAX_READ_REQ_SIZE - 1);
+ tx_desc->buf_addr = cpu_to_le64(dma);
+
+ /* account for data chunks larger than the hardware
+ * can handle
+ */
+ while (unlikely(size > ICE_MAX_DATA_PER_TXD)) {
+ tx_desc->cmd_type_offset_bsz =
+ ice_build_ctob(td_cmd, td_offset, max_data,
+ td_tag);
+
+ tx_desc++;
+ i++;
+
+ if (i == tx_ring->count) {
+ tx_desc = ICE_TX_DESC(tx_ring, 0);
+ i = 0;
+ }
+
+ dma += max_data;
+ size -= max_data;
+
+ max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ }
+
+ if (likely(!data_len))
+ break;
+
+ tx_desc->cmd_type_offset_bsz = ice_build_ctob(td_cmd, td_offset,
+ size, td_tag);
+
+ tx_desc++;
+ i++;
+
+ if (i == tx_ring->count) {
+ tx_desc = ICE_TX_DESC(tx_ring, 0);
+ i = 0;
+ }
+
+ size = skb_frag_size(frag);
+ data_len -= size;
+
+ dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
+ DMA_TO_DEVICE);
+
+ tx_buf = &tx_ring->tx_buf[i];
+ }
+
+ /* record SW timestamp if HW timestamp is not available */
+ skb_tx_timestamp(first->skb);
+
+ i++;
+ if (i == tx_ring->count)
+ i = 0;
+
+ /* write last descriptor with RS and EOP bits */
+ td_cmd |= (u64)ICE_TXD_LAST_DESC_CMD;
+ tx_desc->cmd_type_offset_bsz =
+ ice_build_ctob(td_cmd, td_offset, size, td_tag);
+
+ /* Force memory writes to complete before letting h/w know there
+ * are new descriptors to fetch.
+ *
+ * We also use this memory barrier to make certain all of the
+ * status bits have been updated before next_to_watch is written.
+ */
+ wmb();
+
+ /* set next_to_watch value indicating a packet is present */
+ first->next_to_watch = tx_desc;
+
+ tx_ring->next_to_use = i;
+
+ ice_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+ /* notify HW of packet */
+ kick = __netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount,
+ netdev_xmit_more());
+ if (kick)
+ /* notify HW of packet */
+ writel(i, tx_ring->tail);
+
+ return;
+
+dma_error:
+ /* clear DMA mappings for failed tx_buf map */
+ for (;;) {
+ tx_buf = &tx_ring->tx_buf[i];
+ ice_unmap_and_free_tx_buf(tx_ring, tx_buf);
+ if (tx_buf == first)
+ break;
+ if (i == 0)
+ i = tx_ring->count;
+ i--;
+ }
+
+ tx_ring->next_to_use = i;
+}
+
+/**
+ * ice_tx_csum - Enable Tx checksum offloads
+ * @first: pointer to the first descriptor
+ * @off: pointer to struct that holds offload parameters
+ *
+ * Returns 0 or error (negative) if checksum offload can't happen, 1 otherwise.
+ */
+static
+int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
+{
+ u32 l4_len = 0, l3_len = 0, l2_len = 0;
+ struct sk_buff *skb = first->skb;
+ union {
+ struct iphdr *v4;
+ struct ipv6hdr *v6;
+ unsigned char *hdr;
+ } ip;
+ union {
+ struct tcphdr *tcp;
+ unsigned char *hdr;
+ } l4;
+ __be16 frag_off, protocol;
+ unsigned char *exthdr;
+ u32 offset, cmd = 0;
+ u8 l4_proto = 0;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ return 0;
+
+ protocol = vlan_get_protocol(skb);
+
+ if (eth_p_mpls(protocol)) {
+ ip.hdr = skb_inner_network_header(skb);
+ l4.hdr = skb_checksum_start(skb);
+ } else {
+ ip.hdr = skb_network_header(skb);
+ l4.hdr = skb_transport_header(skb);
+ }
+
+ /* compute outer L2 header size */
+ l2_len = ip.hdr - skb->data;
+ offset = (l2_len / 2) << ICE_TX_DESC_LEN_MACLEN_S;
+
+ /* set the tx_flags to indicate the IP protocol type. this is
+ * required so that checksum header computation below is accurate.
+ */
+ if (ip.v4->version == 4)
+ first->tx_flags |= ICE_TX_FLAGS_IPV4;
+ else if (ip.v6->version == 6)
+ first->tx_flags |= ICE_TX_FLAGS_IPV6;
+
+ if (skb->encapsulation) {
+ bool gso_ena = false;
+ u32 tunnel = 0;
+
+ /* define outer network header type */
+ if (first->tx_flags & ICE_TX_FLAGS_IPV4) {
+ tunnel |= (first->tx_flags & ICE_TX_FLAGS_TSO) ?
+ ICE_TX_CTX_EIPT_IPV4 :
+ ICE_TX_CTX_EIPT_IPV4_NO_CSUM;
+ l4_proto = ip.v4->protocol;
+ } else if (first->tx_flags & ICE_TX_FLAGS_IPV6) {
+ int ret;
+
+ tunnel |= ICE_TX_CTX_EIPT_IPV6;
+ exthdr = ip.hdr + sizeof(*ip.v6);
+ l4_proto = ip.v6->nexthdr;
+ ret = ipv6_skip_exthdr(skb, exthdr - skb->data,
+ &l4_proto, &frag_off);
+ if (ret < 0)
+ return -1;
+ }
+
+ /* define outer transport */
+ switch (l4_proto) {
+ case IPPROTO_UDP:
+ tunnel |= ICE_TXD_CTX_UDP_TUNNELING;
+ first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
+ break;
+ case IPPROTO_GRE:
+ tunnel |= ICE_TXD_CTX_GRE_TUNNELING;
+ first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
+ break;
+ case IPPROTO_IPIP:
+ case IPPROTO_IPV6:
+ first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
+ l4.hdr = skb_inner_network_header(skb);
+ break;
+ default:
+ if (first->tx_flags & ICE_TX_FLAGS_TSO)
+ return -1;
+
+ skb_checksum_help(skb);
+ return 0;
+ }
+
+ /* compute outer L3 header size */
+ tunnel |= ((l4.hdr - ip.hdr) / 4) <<
+ ICE_TXD_CTX_QW0_EIPLEN_S;
+
+ /* switch IP header pointer from outer to inner header */
+ ip.hdr = skb_inner_network_header(skb);
+
+ /* compute tunnel header size */
+ tunnel |= ((ip.hdr - l4.hdr) / 2) <<
+ ICE_TXD_CTX_QW0_NATLEN_S;
+
+ gso_ena = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL;
+ /* indicate if we need to offload outer UDP header */
+ if ((first->tx_flags & ICE_TX_FLAGS_TSO) && !gso_ena &&
+ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
+ tunnel |= ICE_TXD_CTX_QW0_L4T_CS_M;
+
+ /* record tunnel offload values */
+ off->cd_tunnel_params |= tunnel;
+
+ /* set DTYP=1 to indicate that it's an Tx context descriptor
+ * in IPsec tunnel mode with Tx offloads in Quad word 1
+ */
+ off->cd_qw1 |= (u64)ICE_TX_DESC_DTYPE_CTX;
+
+ /* switch L4 header pointer from outer to inner */
+ l4.hdr = skb_inner_transport_header(skb);
+ l4_proto = 0;
+
+ /* reset type as we transition from outer to inner headers */
+ first->tx_flags &= ~(ICE_TX_FLAGS_IPV4 | ICE_TX_FLAGS_IPV6);
+ if (ip.v4->version == 4)
+ first->tx_flags |= ICE_TX_FLAGS_IPV4;
+ if (ip.v6->version == 6)
+ first->tx_flags |= ICE_TX_FLAGS_IPV6;
+ }
+
+ /* Enable IP checksum offloads */
+ if (first->tx_flags & ICE_TX_FLAGS_IPV4) {
+ l4_proto = ip.v4->protocol;
+ /* the stack computes the IP header already, the only time we
+ * need the hardware to recompute it is in the case of TSO.
+ */
+ if (first->tx_flags & ICE_TX_FLAGS_TSO)
+ cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM;
+ else
+ cmd |= ICE_TX_DESC_CMD_IIPT_IPV4;
+
+ } else if (first->tx_flags & ICE_TX_FLAGS_IPV6) {
+ cmd |= ICE_TX_DESC_CMD_IIPT_IPV6;
+ exthdr = ip.hdr + sizeof(*ip.v6);
+ l4_proto = ip.v6->nexthdr;
+ if (l4.hdr != exthdr)
+ ipv6_skip_exthdr(skb, exthdr - skb->data, &l4_proto,
+ &frag_off);
+ } else {
+ return -1;
+ }
+
+ /* compute inner L3 header size */
+ l3_len = l4.hdr - ip.hdr;
+ offset |= (l3_len / 4) << ICE_TX_DESC_LEN_IPLEN_S;
+
+ /* Enable L4 checksum offloads */
+ switch (l4_proto) {
+ case IPPROTO_TCP:
+ /* enable checksum offloads */
+ cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP;
+ l4_len = l4.tcp->doff;
+ offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
+ break;
+ case IPPROTO_UDP:
+ /* enable UDP checksum offload */
+ cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP;
+ l4_len = (sizeof(struct udphdr) >> 2);
+ offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
+ break;
+ case IPPROTO_SCTP:
+ /* enable SCTP checksum offload */
+ cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP;
+ l4_len = sizeof(struct sctphdr) >> 2;
+ offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
+ break;
+
+ default:
+ if (first->tx_flags & ICE_TX_FLAGS_TSO)
+ return -1;
+ skb_checksum_help(skb);
+ return 0;
+ }
+
+ off->td_cmd |= cmd;
+ off->td_offset |= offset;
+ return 1;
+}
+
+/**
+ * ice_tx_prepare_vlan_flags - prepare generic Tx VLAN tagging flags for HW
+ * @tx_ring: ring to send buffer on
+ * @first: pointer to struct ice_tx_buf
+ *
+ * Checks the skb and set up correspondingly several generic transmit flags
+ * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
+ */
+static void
+ice_tx_prepare_vlan_flags(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first)
+{
+ struct sk_buff *skb = first->skb;
+
+ /* nothing left to do, software offloaded VLAN */
+ if (!skb_vlan_tag_present(skb) && eth_type_vlan(skb->protocol))
+ return;
+
+ /* the VLAN ethertype/tpid is determined by VSI configuration and netdev
+ * feature flags, which the driver only allows either 802.1Q or 802.1ad
+ * VLAN offloads exclusively so we only care about the VLAN ID here
+ */
+ if (skb_vlan_tag_present(skb)) {
+ first->tx_flags |= skb_vlan_tag_get(skb) << ICE_TX_FLAGS_VLAN_S;
+ if (tx_ring->flags & ICE_TX_FLAGS_RING_VLAN_L2TAG2)
+ first->tx_flags |= ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN;
+ else
+ first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
+ }
+
+ ice_tx_prepare_vlan_flags_dcb(tx_ring, first);
+}
+
+/**
+ * ice_tso - computes mss and TSO length to prepare for TSO
+ * @first: pointer to struct ice_tx_buf
+ * @off: pointer to struct that holds offload parameters
+ *
+ * Returns 0 or error (negative) if TSO can't happen, 1 otherwise.
+ */
+static
+int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
+{
+ struct sk_buff *skb = first->skb;
+ union {
+ struct iphdr *v4;
+ struct ipv6hdr *v6;
+ unsigned char *hdr;
+ } ip;
+ union {
+ struct tcphdr *tcp;
+ struct udphdr *udp;
+ unsigned char *hdr;
+ } l4;
+ u64 cd_mss, cd_tso_len;
+ __be16 protocol;
+ u32 paylen;
+ u8 l4_start;
+ int err;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ return 0;
+
+ if (!skb_is_gso(skb))
+ return 0;
+
+ err = skb_cow_head(skb, 0);
+ if (err < 0)
+ return err;
+
+ /* cppcheck-suppress unreadVariable */
+ protocol = vlan_get_protocol(skb);
+
+ if (eth_p_mpls(protocol))
+ ip.hdr = skb_inner_network_header(skb);
+ else
+ ip.hdr = skb_network_header(skb);
+ l4.hdr = skb_checksum_start(skb);
+
+ /* initialize outer IP header fields */
+ if (ip.v4->version == 4) {
+ ip.v4->tot_len = 0;
+ ip.v4->check = 0;
+ } else {
+ ip.v6->payload_len = 0;
+ }
+
+ if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
+ SKB_GSO_IPXIP4 |
+ SKB_GSO_IPXIP6 |
+ SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM)) {
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
+ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) {
+ l4.udp->len = 0;
+
+ /* determine offset of outer transport header */
+ l4_start = (u8)(l4.hdr - skb->data);
+
+ /* remove payload length from outer checksum */
+ paylen = skb->len - l4_start;
+ csum_replace_by_diff(&l4.udp->check,
+ (__force __wsum)htonl(paylen));
+ }
+
+ /* reset pointers to inner headers */
+
+ /* cppcheck-suppress unreadVariable */
+ ip.hdr = skb_inner_network_header(skb);
+ l4.hdr = skb_inner_transport_header(skb);
+
+ /* initialize inner IP header fields */
+ if (ip.v4->version == 4) {
+ ip.v4->tot_len = 0;
+ ip.v4->check = 0;
+ } else {
+ ip.v6->payload_len = 0;
+ }
+ }
+
+ /* determine offset of transport header */
+ l4_start = (u8)(l4.hdr - skb->data);
+
+ /* remove payload length from checksum */
+ paylen = skb->len - l4_start;
+
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+ csum_replace_by_diff(&l4.udp->check,
+ (__force __wsum)htonl(paylen));
+ /* compute length of UDP segmentation header */
+ off->header_len = (u8)sizeof(l4.udp) + l4_start;
+ } else {
+ csum_replace_by_diff(&l4.tcp->check,
+ (__force __wsum)htonl(paylen));
+ /* compute length of TCP segmentation header */
+ off->header_len = (u8)((l4.tcp->doff * 4) + l4_start);
+ }
+
+ /* update gso_segs and bytecount */
+ first->gso_segs = skb_shinfo(skb)->gso_segs;
+ first->bytecount += (first->gso_segs - 1) * off->header_len;
+
+ cd_tso_len = skb->len - off->header_len;
+ cd_mss = skb_shinfo(skb)->gso_size;
+
+ /* record cdesc_qw1 with TSO parameters */
+ off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+ (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) |
+ (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
+ (cd_mss << ICE_TXD_CTX_QW1_MSS_S));
+ first->tx_flags |= ICE_TX_FLAGS_TSO;
+ return 1;
+}
+
+/**
+ * ice_txd_use_count - estimate the number of descriptors needed for Tx
+ * @size: transmit request size in bytes
+ *
+ * Due to hardware alignment restrictions (4K alignment), we need to
+ * assume that we can have no more than 12K of data per descriptor, even
+ * though each descriptor can take up to 16K - 1 bytes of aligned memory.
+ * Thus, we need to divide by 12K. But division is slow! Instead,
+ * we decompose the operation into shifts and one relatively cheap
+ * multiply operation.
+ *
+ * To divide by 12K, we first divide by 4K, then divide by 3:
+ * To divide by 4K, shift right by 12 bits
+ * To divide by 3, multiply by 85, then divide by 256
+ * (Divide by 256 is done by shifting right by 8 bits)
+ * Finally, we add one to round up. Because 256 isn't an exact multiple of
+ * 3, we'll underestimate near each multiple of 12K. This is actually more
+ * accurate as we have 4K - 1 of wiggle room that we can fit into the last
+ * segment. For our purposes this is accurate out to 1M which is orders of
+ * magnitude greater than our largest possible GSO size.
+ *
+ * This would then be implemented as:
+ * return (((size >> 12) * 85) >> 8) + ICE_DESCS_FOR_SKB_DATA_PTR;
+ *
+ * Since multiplication and division are commutative, we can reorder
+ * operations into:
+ * return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR;
+ */
+static unsigned int ice_txd_use_count(unsigned int size)
+{
+ return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR;
+}
+
+/**
+ * ice_xmit_desc_count - calculate number of Tx descriptors needed
+ * @skb: send buffer
+ *
+ * Returns number of data descriptors needed for this skb.
+ */
+static unsigned int ice_xmit_desc_count(struct sk_buff *skb)
+{
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+ unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
+ unsigned int count = 0, size = skb_headlen(skb);
+
+ for (;;) {
+ count += ice_txd_use_count(size);
+
+ if (!nr_frags--)
+ break;
+
+ size = skb_frag_size(frag++);
+ }
+
+ return count;
+}
+
+/**
+ * __ice_chk_linearize - Check if there are more than 8 buffers per packet
+ * @skb: send buffer
+ *
+ * Note: This HW can't DMA more than 8 buffers to build a packet on the wire
+ * and so we need to figure out the cases where we need to linearize the skb.
+ *
+ * For TSO we need to count the TSO header and segment payload separately.
+ * As such we need to check cases where we have 7 fragments or more as we
+ * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
+ * the segment payload in the first descriptor, and another 7 for the
+ * fragments.
+ */
+static bool __ice_chk_linearize(struct sk_buff *skb)
+{
+ const skb_frag_t *frag, *stale;
+ int nr_frags, sum;
+
+ /* no need to check if number of frags is less than 7 */
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ if (nr_frags < (ICE_MAX_BUF_TXD - 1))
+ return false;
+
+ /* We need to walk through the list and validate that each group
+ * of 6 fragments totals at least gso_size.
+ */
+ nr_frags -= ICE_MAX_BUF_TXD - 2;
+ frag = &skb_shinfo(skb)->frags[0];
+
+ /* Initialize size to the negative value of gso_size minus 1. We
+ * use this as the worst case scenario in which the frag ahead
+ * of us only provides one byte which is why we are limited to 6
+ * descriptors for a single transmit as the header and previous
+ * fragment are already consuming 2 descriptors.
+ */
+ sum = 1 - skb_shinfo(skb)->gso_size;
+
+ /* Add size of frags 0 through 4 to create our initial sum */
+ sum += skb_frag_size(frag++);
+ sum += skb_frag_size(frag++);
+ sum += skb_frag_size(frag++);
+ sum += skb_frag_size(frag++);
+ sum += skb_frag_size(frag++);
+
+ /* Walk through fragments adding latest fragment, testing it, and
+ * then removing stale fragments from the sum.
+ */
+ for (stale = &skb_shinfo(skb)->frags[0];; stale++) {
+ int stale_size = skb_frag_size(stale);
+
+ sum += skb_frag_size(frag++);
+
+ /* The stale fragment may present us with a smaller
+ * descriptor than the actual fragment size. To account
+ * for that we need to remove all the data on the front and
+ * figure out what the remainder would be in the last
+ * descriptor associated with the fragment.
+ */
+ if (stale_size > ICE_MAX_DATA_PER_TXD) {
+ int align_pad = -(skb_frag_off(stale)) &
+ (ICE_MAX_READ_REQ_SIZE - 1);
+
+ sum -= align_pad;
+ stale_size -= align_pad;
+
+ do {
+ sum -= ICE_MAX_DATA_PER_TXD_ALIGNED;
+ stale_size -= ICE_MAX_DATA_PER_TXD_ALIGNED;
+ } while (stale_size > ICE_MAX_DATA_PER_TXD);
+ }
+
+ /* if sum is negative we failed to make sufficient progress */
+ if (sum < 0)
+ return true;
+
+ if (!nr_frags--)
+ break;
+
+ sum -= stale_size;
+ }
+
+ return false;
+}
+
+/**
+ * ice_chk_linearize - Check if there are more than 8 fragments per packet
+ * @skb: send buffer
+ * @count: number of buffers used
+ *
+ * Note: Our HW can't scatter-gather more than 8 fragments to build
+ * a packet on the wire and so we need to figure out the cases where we
+ * need to linearize the skb.
+ */
+static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count)
+{
+ /* Both TSO and single send will work if count is less than 8 */
+ if (likely(count < ICE_MAX_BUF_TXD))
+ return false;
+
+ if (skb_is_gso(skb))
+ return __ice_chk_linearize(skb);
+
+ /* we can support up to 8 data buffers for a single send */
+ return count != ICE_MAX_BUF_TXD;
+}
+
+/**
+ * ice_tstamp - set up context descriptor for hardware timestamp
+ * @tx_ring: pointer to the Tx ring to send buffer on
+ * @skb: pointer to the SKB we're sending
+ * @first: Tx buffer
+ * @off: Tx offload parameters
+ */
+static void
+ice_tstamp(struct ice_tx_ring *tx_ring, struct sk_buff *skb,
+ struct ice_tx_buf *first, struct ice_tx_offload_params *off)
+{
+ s8 idx;
+
+ /* only timestamp the outbound packet if the user has requested it */
+ if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
+ return;
+
+ if (!tx_ring->ptp_tx)
+ return;
+
+ /* Tx timestamps cannot be sampled when doing TSO */
+ if (first->tx_flags & ICE_TX_FLAGS_TSO)
+ return;
+
+ /* Grab an open timestamp slot */
+ idx = ice_ptp_request_ts(tx_ring->tx_tstamps, skb);
+ if (idx < 0) {
+ tx_ring->vsi->back->ptp.tx_hwtstamp_skipped++;
+ return;
+ }
+
+ off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+ (ICE_TX_CTX_DESC_TSYN << ICE_TXD_CTX_QW1_CMD_S) |
+ ((u64)idx << ICE_TXD_CTX_QW1_TSO_LEN_S));
+ first->tx_flags |= ICE_TX_FLAGS_TSYN;
+}
+
+/**
+ * ice_xmit_frame_ring - Sends buffer on Tx ring
+ * @skb: send buffer
+ * @tx_ring: ring to send buffer on
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+static netdev_tx_t
+ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring)
+{
+ struct ice_tx_offload_params offload = { 0 };
+ struct ice_vsi *vsi = tx_ring->vsi;
+ struct ice_tx_buf *first;
+ struct ethhdr *eth;
+ unsigned int count;
+ int tso, csum;
+
+ ice_trace(xmit_frame_ring, tx_ring, skb);
+
+ count = ice_xmit_desc_count(skb);
+ if (ice_chk_linearize(skb, count)) {
+ if (__skb_linearize(skb))
+ goto out_drop;
+ count = ice_txd_use_count(skb->len);
+ tx_ring->tx_stats.tx_linearize++;
+ }
+
+ /* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD,
+ * + 1 desc for skb_head_len/ICE_MAX_DATA_PER_TXD,
+ * + 4 desc gap to avoid the cache line where head is,
+ * + 1 desc for context descriptor,
+ * otherwise try next time
+ */
+ if (ice_maybe_stop_tx(tx_ring, count + ICE_DESCS_PER_CACHE_LINE +
+ ICE_DESCS_FOR_CTX_DESC)) {
+ tx_ring->tx_stats.tx_busy++;
+ return NETDEV_TX_BUSY;
+ }
+
+ /* prefetch for bql data which is infrequently used */
+ netdev_txq_bql_enqueue_prefetchw(txring_txq(tx_ring));
+
+ offload.tx_ring = tx_ring;
+
+ /* record the location of the first descriptor for this packet */
+ first = &tx_ring->tx_buf[tx_ring->next_to_use];
+ first->skb = skb;
+ first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
+ first->gso_segs = 1;
+ first->tx_flags = 0;
+
+ /* prepare the VLAN tagging flags for Tx */
+ ice_tx_prepare_vlan_flags(tx_ring, first);
+ if (first->tx_flags & ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN) {
+ offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+ (ICE_TX_CTX_DESC_IL2TAG2 <<
+ ICE_TXD_CTX_QW1_CMD_S));
+ offload.cd_l2tag2 = (first->tx_flags & ICE_TX_FLAGS_VLAN_M) >>
+ ICE_TX_FLAGS_VLAN_S;
+ }
+
+ /* set up TSO offload */
+ tso = ice_tso(first, &offload);
+ if (tso < 0)
+ goto out_drop;
+
+ /* always set up Tx checksum offload */
+ csum = ice_tx_csum(first, &offload);
+ if (csum < 0)
+ goto out_drop;
+
+ /* allow CONTROL frames egress from main VSI if FW LLDP disabled */
+ eth = (struct ethhdr *)skb_mac_header(skb);
+ if (unlikely((skb->priority == TC_PRIO_CONTROL ||
+ eth->h_proto == htons(ETH_P_LLDP)) &&
+ vsi->type == ICE_VSI_PF &&
+ vsi->port_info->qos_cfg.is_sw_lldp))
+ offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+ ICE_TX_CTX_DESC_SWTCH_UPLINK <<
+ ICE_TXD_CTX_QW1_CMD_S);
+
+ ice_tstamp(tx_ring, skb, first, &offload);
+ if (ice_is_switchdev_running(vsi->back))
+ ice_eswitch_set_target_vsi(skb, &offload);
+
+ if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) {
+ struct ice_tx_ctx_desc *cdesc;
+ u16 i = tx_ring->next_to_use;
+
+ /* grab the next descriptor */
+ cdesc = ICE_TX_CTX_DESC(tx_ring, i);
+ i++;
+ tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+ /* setup context descriptor */
+ cdesc->tunneling_params = cpu_to_le32(offload.cd_tunnel_params);
+ cdesc->l2tag2 = cpu_to_le16(offload.cd_l2tag2);
+ cdesc->rsvd = cpu_to_le16(0);
+ cdesc->qw1 = cpu_to_le64(offload.cd_qw1);
+ }
+
+ ice_tx_map(tx_ring, first, &offload);
+ return NETDEV_TX_OK;
+
+out_drop:
+ ice_trace(xmit_frame_ring_drop, tx_ring, skb);
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+}
+
+/**
+ * ice_start_xmit - Selects the correct VSI and Tx queue to send buffer
+ * @skb: send buffer
+ * @netdev: network interface device structure
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_tx_ring *tx_ring;
+
+ tx_ring = vsi->tx_rings[skb->queue_mapping];
+
+ /* hardware can't handle really short frames, hardware padding works
+ * beyond this point
+ */
+ if (skb_put_padto(skb, ICE_MIN_TX_LEN))
+ return NETDEV_TX_OK;
+
+ return ice_xmit_frame_ring(skb, tx_ring);
+}
+
+/**
+ * ice_get_dscp_up - return the UP/TC value for a SKB
+ * @dcbcfg: DCB config that contains DSCP to UP/TC mapping
+ * @skb: SKB to query for info to determine UP/TC
+ *
+ * This function is to only be called when the PF is in L3 DSCP PFC mode
+ */
+static u8 ice_get_dscp_up(struct ice_dcbx_cfg *dcbcfg, struct sk_buff *skb)
+{
+ u8 dscp = 0;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+
+ return dcbcfg->dscp_map[dscp];
+}
+
+u16
+ice_select_queue(struct net_device *netdev, struct sk_buff *skb,
+ struct net_device *sb_dev)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_dcbx_cfg *dcbcfg;
+
+ dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+ if (dcbcfg->pfc_mode == ICE_QOS_MODE_DSCP)
+ skb->priority = ice_get_dscp_up(dcbcfg, skb);
+
+ return netdev_pick_tx(netdev, skb, sb_dev);
+}
+
+/**
+ * ice_clean_ctrl_tx_irq - interrupt handler for flow director Tx queue
+ * @tx_ring: tx_ring to clean
+ */
+void ice_clean_ctrl_tx_irq(struct ice_tx_ring *tx_ring)
+{
+ struct ice_vsi *vsi = tx_ring->vsi;
+ s16 i = tx_ring->next_to_clean;
+ int budget = ICE_DFLT_IRQ_WORK;
+ struct ice_tx_desc *tx_desc;
+ struct ice_tx_buf *tx_buf;
+
+ tx_buf = &tx_ring->tx_buf[i];
+ tx_desc = ICE_TX_DESC(tx_ring, i);
+ i -= tx_ring->count;
+
+ do {
+ struct ice_tx_desc *eop_desc = tx_buf->next_to_watch;
+
+ /* if next_to_watch is not set then there is no pending work */
+ if (!eop_desc)
+ break;
+
+ /* prevent any other reads prior to eop_desc */
+ smp_rmb();
+
+ /* if the descriptor isn't done, no work to do */
+ if (!(eop_desc->cmd_type_offset_bsz &
+ cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+ break;
+
+ /* clear next_to_watch to prevent false hangs */
+ tx_buf->next_to_watch = NULL;
+ tx_desc->buf_addr = 0;
+ tx_desc->cmd_type_offset_bsz = 0;
+
+ /* move past filter desc */
+ tx_buf++;
+ tx_desc++;
+ i++;
+ if (unlikely(!i)) {
+ i -= tx_ring->count;
+ tx_buf = tx_ring->tx_buf;
+ tx_desc = ICE_TX_DESC(tx_ring, 0);
+ }
+
+ /* unmap the data header */
+ if (dma_unmap_len(tx_buf, len))
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len),
+ DMA_TO_DEVICE);
+ if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT)
+ devm_kfree(tx_ring->dev, tx_buf->raw_buf);
+
+ /* clear next_to_watch to prevent false hangs */
+ tx_buf->raw_buf = NULL;
+ tx_buf->tx_flags = 0;
+ tx_buf->next_to_watch = NULL;
+ dma_unmap_len_set(tx_buf, len, 0);
+ tx_desc->buf_addr = 0;
+ tx_desc->cmd_type_offset_bsz = 0;
+
+ /* move past eop_desc for start of next FD desc */
+ tx_buf++;
+ tx_desc++;
+ i++;
+ if (unlikely(!i)) {
+ i -= tx_ring->count;
+ tx_buf = tx_ring->tx_buf;
+ tx_desc = ICE_TX_DESC(tx_ring, 0);
+ }
+
+ budget--;
+ } while (likely(budget));
+
+ i += tx_ring->count;
+ tx_ring->next_to_clean = i;
+
+ /* re-enable interrupt if needed */
+ ice_irq_dynamic_ena(&vsi->back->hw, vsi, vsi->q_vectors[0]);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
new file mode 100644
index 000000000..932b5661e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -0,0 +1,445 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_TXRX_H_
+#define _ICE_TXRX_H_
+
+#include "ice_type.h"
+
+#define ICE_DFLT_IRQ_WORK 256
+#define ICE_RXBUF_3072 3072
+#define ICE_RXBUF_2048 2048
+#define ICE_RXBUF_1536 1536
+#define ICE_MAX_CHAINED_RX_BUFS 5
+#define ICE_MAX_BUF_TXD 8
+#define ICE_MIN_TX_LEN 17
+
+/* The size limit for a transmit buffer in a descriptor is (16K - 1).
+ * In order to align with the read requests we will align the value to
+ * the nearest 4K which represents our maximum read request size.
+ */
+#define ICE_MAX_READ_REQ_SIZE 4096
+#define ICE_MAX_DATA_PER_TXD (16 * 1024 - 1)
+#define ICE_MAX_DATA_PER_TXD_ALIGNED \
+ (~(ICE_MAX_READ_REQ_SIZE - 1) & ICE_MAX_DATA_PER_TXD)
+
+#define ICE_MAX_TXQ_PER_TXQG 128
+
+/* Attempt to maximize the headroom available for incoming frames. We use a 2K
+ * buffer for MTUs <= 1500 and need 1536/1534 to store the data for the frame.
+ * This leaves us with 512 bytes of room. From that we need to deduct the
+ * space needed for the shared info and the padding needed to IP align the
+ * frame.
+ *
+ * Note: For cache line sizes 256 or larger this value is going to end
+ * up negative. In these cases we should fall back to the legacy
+ * receive path.
+ */
+#if (PAGE_SIZE < 8192)
+#define ICE_2K_TOO_SMALL_WITH_PADDING \
+ ((unsigned int)(NET_SKB_PAD + ICE_RXBUF_1536) > \
+ SKB_WITH_OVERHEAD(ICE_RXBUF_2048))
+
+/**
+ * ice_compute_pad - compute the padding
+ * @rx_buf_len: buffer length
+ *
+ * Figure out the size of half page based on given buffer length and
+ * then subtract the skb_shared_info followed by subtraction of the
+ * actual buffer length; this in turn results in the actual space that
+ * is left for padding usage
+ */
+static inline int ice_compute_pad(int rx_buf_len)
+{
+ int half_page_size;
+
+ half_page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
+ return SKB_WITH_OVERHEAD(half_page_size) - rx_buf_len;
+}
+
+/**
+ * ice_skb_pad - determine the padding that we can supply
+ *
+ * Figure out the right Rx buffer size and based on that calculate the
+ * padding
+ */
+static inline int ice_skb_pad(void)
+{
+ int rx_buf_len;
+
+ /* If a 2K buffer cannot handle a standard Ethernet frame then
+ * optimize padding for a 3K buffer instead of a 1.5K buffer.
+ *
+ * For a 3K buffer we need to add enough padding to allow for
+ * tailroom due to NET_IP_ALIGN possibly shifting us out of
+ * cache-line alignment.
+ */
+ if (ICE_2K_TOO_SMALL_WITH_PADDING)
+ rx_buf_len = ICE_RXBUF_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
+ else
+ rx_buf_len = ICE_RXBUF_1536;
+
+ /* if needed make room for NET_IP_ALIGN */
+ rx_buf_len -= NET_IP_ALIGN;
+
+ return ice_compute_pad(rx_buf_len);
+}
+
+#define ICE_SKB_PAD ice_skb_pad()
+#else
+#define ICE_2K_TOO_SMALL_WITH_PADDING false
+#define ICE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
+#endif
+
+/* We are assuming that the cache line is always 64 Bytes here for ice.
+ * In order to make sure that is a correct assumption there is a check in probe
+ * to print a warning if the read from GLPCI_CNF2 tells us that the cache line
+ * size is 128 bytes. We do it this way because we do not want to read the
+ * GLPCI_CNF2 register or a variable containing the value on every pass through
+ * the Tx path.
+ */
+#define ICE_CACHE_LINE_BYTES 64
+#define ICE_DESCS_PER_CACHE_LINE (ICE_CACHE_LINE_BYTES / \
+ sizeof(struct ice_tx_desc))
+#define ICE_DESCS_FOR_CTX_DESC 1
+#define ICE_DESCS_FOR_SKB_DATA_PTR 1
+/* Tx descriptors needed, worst case */
+#define DESC_NEEDED (MAX_SKB_FRAGS + ICE_DESCS_FOR_CTX_DESC + \
+ ICE_DESCS_PER_CACHE_LINE + ICE_DESCS_FOR_SKB_DATA_PTR)
+#define ICE_DESC_UNUSED(R) \
+ (u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+ (R)->next_to_clean - (R)->next_to_use - 1)
+
+#define ICE_RING_QUARTER(R) ((R)->count >> 2)
+
+#define ICE_TX_FLAGS_TSO BIT(0)
+#define ICE_TX_FLAGS_HW_VLAN BIT(1)
+#define ICE_TX_FLAGS_SW_VLAN BIT(2)
+/* ICE_TX_FLAGS_DUMMY_PKT is used to mark dummy packets that should be
+ * freed instead of returned like skb packets.
+ */
+#define ICE_TX_FLAGS_DUMMY_PKT BIT(3)
+#define ICE_TX_FLAGS_TSYN BIT(4)
+#define ICE_TX_FLAGS_IPV4 BIT(5)
+#define ICE_TX_FLAGS_IPV6 BIT(6)
+#define ICE_TX_FLAGS_TUNNEL BIT(7)
+#define ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN BIT(8)
+#define ICE_TX_FLAGS_VLAN_M 0xffff0000
+#define ICE_TX_FLAGS_VLAN_PR_M 0xe0000000
+#define ICE_TX_FLAGS_VLAN_PR_S 29
+#define ICE_TX_FLAGS_VLAN_S 16
+
+#define ICE_XDP_PASS 0
+#define ICE_XDP_CONSUMED BIT(0)
+#define ICE_XDP_TX BIT(1)
+#define ICE_XDP_REDIR BIT(2)
+#define ICE_XDP_EXIT BIT(3)
+
+#define ICE_RX_DMA_ATTR \
+ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+#define ICE_ETH_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
+
+#define ICE_TXD_LAST_DESC_CMD (ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)
+
+struct ice_tx_buf {
+ struct ice_tx_desc *next_to_watch;
+ union {
+ struct sk_buff *skb;
+ void *raw_buf; /* used for XDP */
+ };
+ unsigned int bytecount;
+ unsigned short gso_segs;
+ u32 tx_flags;
+ DEFINE_DMA_UNMAP_LEN(len);
+ DEFINE_DMA_UNMAP_ADDR(dma);
+};
+
+struct ice_tx_offload_params {
+ u64 cd_qw1;
+ struct ice_tx_ring *tx_ring;
+ u32 td_cmd;
+ u32 td_offset;
+ u32 td_l2tag1;
+ u32 cd_tunnel_params;
+ u16 cd_l2tag2;
+ u8 header_len;
+};
+
+struct ice_rx_buf {
+ dma_addr_t dma;
+ struct page *page;
+ unsigned int page_offset;
+ u16 pagecnt_bias;
+};
+
+struct ice_q_stats {
+ u64 pkts;
+ u64 bytes;
+};
+
+struct ice_txq_stats {
+ u64 restart_q;
+ u64 tx_busy;
+ u64 tx_linearize;
+ int prev_pkt; /* negative if no pending Tx descriptors */
+};
+
+struct ice_rxq_stats {
+ u64 non_eop_descs;
+ u64 alloc_page_failed;
+ u64 alloc_buf_failed;
+};
+
+enum ice_ring_state_t {
+ ICE_TX_XPS_INIT_DONE,
+ ICE_TX_NBITS,
+};
+
+/* this enum matches hardware bits and is meant to be used by DYN_CTLN
+ * registers and QINT registers or more generally anywhere in the manual
+ * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any
+ * register but instead is a special value meaning "don't update" ITR0/1/2.
+ */
+enum ice_dyn_idx_t {
+ ICE_IDX_ITR0 = 0,
+ ICE_IDX_ITR1 = 1,
+ ICE_IDX_ITR2 = 2,
+ ICE_ITR_NONE = 3 /* ITR_NONE must not be used as an index */
+};
+
+/* Header split modes defined by DTYPE field of Rx RLAN context */
+enum ice_rx_dtype {
+ ICE_RX_DTYPE_NO_SPLIT = 0,
+ ICE_RX_DTYPE_HEADER_SPLIT = 1,
+ ICE_RX_DTYPE_SPLIT_ALWAYS = 2,
+};
+
+/* indices into GLINT_ITR registers */
+#define ICE_RX_ITR ICE_IDX_ITR0
+#define ICE_TX_ITR ICE_IDX_ITR1
+#define ICE_ITR_8K 124
+#define ICE_ITR_20K 50
+#define ICE_ITR_MAX 8160 /* 0x1FE0 */
+#define ICE_DFLT_TX_ITR ICE_ITR_20K
+#define ICE_DFLT_RX_ITR ICE_ITR_20K
+enum ice_dynamic_itr {
+ ITR_STATIC = 0,
+ ITR_DYNAMIC = 1
+};
+
+#define ITR_IS_DYNAMIC(rc) ((rc)->itr_mode == ITR_DYNAMIC)
+#define ICE_ITR_GRAN_S 1 /* ITR granularity is always 2us */
+#define ICE_ITR_GRAN_US BIT(ICE_ITR_GRAN_S)
+#define ICE_ITR_MASK 0x1FFE /* ITR register value alignment mask */
+#define ITR_REG_ALIGN(setting) ((setting) & ICE_ITR_MASK)
+
+#define ICE_DFLT_INTRL 0
+#define ICE_MAX_INTRL 236
+
+#define ICE_IN_WB_ON_ITR_MODE 255
+/* Sets WB_ON_ITR and assumes INTENA bit is already cleared, which allows
+ * setting the MSK_M bit to tell hardware to ignore the INTENA_M bit. Also,
+ * set the write-back latency to the usecs passed in.
+ */
+#define ICE_GLINT_DYN_CTL_WB_ON_ITR(usecs, itr_idx) \
+ ((((usecs) << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S)) & \
+ GLINT_DYN_CTL_INTERVAL_M) | \
+ (((itr_idx) << GLINT_DYN_CTL_ITR_INDX_S) & \
+ GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M | \
+ GLINT_DYN_CTL_WB_ON_ITR_M)
+
+/* Legacy or Advanced Mode Queue */
+#define ICE_TX_ADVANCED 0
+#define ICE_TX_LEGACY 1
+
+/* descriptor ring, associated with a VSI */
+struct ice_rx_ring {
+ /* CL1 - 1st cacheline starts here */
+ struct ice_rx_ring *next; /* pointer to next ring in q_vector */
+ void *desc; /* Descriptor ring memory */
+ struct device *dev; /* Used for DMA mapping */
+ struct net_device *netdev; /* netdev ring maps to */
+ struct ice_vsi *vsi; /* Backreference to associated VSI */
+ struct ice_q_vector *q_vector; /* Backreference to associated vector */
+ u8 __iomem *tail;
+ union {
+ struct ice_rx_buf *rx_buf;
+ struct xdp_buff **xdp_buf;
+ };
+ /* CL2 - 2nd cacheline starts here */
+ struct xdp_rxq_info xdp_rxq;
+ /* CL3 - 3rd cacheline starts here */
+ u16 q_index; /* Queue number of ring */
+
+ u16 count; /* Number of descriptors */
+ u16 reg_idx; /* HW register index of the ring */
+
+ /* used in interrupt processing */
+ u16 next_to_use;
+ u16 next_to_clean;
+ u16 next_to_alloc;
+ u16 rx_offset;
+ u16 rx_buf_len;
+
+ /* stats structs */
+ struct ice_rxq_stats rx_stats;
+ struct ice_q_stats stats;
+ struct u64_stats_sync syncp;
+
+ struct rcu_head rcu; /* to avoid race on free */
+ /* CL4 - 3rd cacheline starts here */
+ struct ice_channel *ch;
+ struct bpf_prog *xdp_prog;
+ struct ice_tx_ring *xdp_ring;
+ struct xsk_buff_pool *xsk_pool;
+ struct sk_buff *skb;
+ dma_addr_t dma; /* physical address of ring */
+ u64 cached_phctime;
+ u8 dcb_tc; /* Traffic class of ring */
+ u8 ptp_rx;
+#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1)
+#define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2)
+ u8 flags;
+} ____cacheline_internodealigned_in_smp;
+
+struct ice_tx_ring {
+ /* CL1 - 1st cacheline starts here */
+ struct ice_tx_ring *next; /* pointer to next ring in q_vector */
+ void *desc; /* Descriptor ring memory */
+ struct device *dev; /* Used for DMA mapping */
+ u8 __iomem *tail;
+ struct ice_tx_buf *tx_buf;
+ struct ice_q_vector *q_vector; /* Backreference to associated vector */
+ struct net_device *netdev; /* netdev ring maps to */
+ struct ice_vsi *vsi; /* Backreference to associated VSI */
+ /* CL2 - 2nd cacheline starts here */
+ dma_addr_t dma; /* physical address of ring */
+ struct xsk_buff_pool *xsk_pool;
+ u16 next_to_use;
+ u16 next_to_clean;
+ u16 next_rs;
+ u16 next_dd;
+ u16 q_handle; /* Queue handle per TC */
+ u16 reg_idx; /* HW register index of the ring */
+ u16 count; /* Number of descriptors */
+ u16 q_index; /* Queue number of ring */
+ /* stats structs */
+ struct ice_txq_stats tx_stats;
+ /* CL3 - 3rd cacheline starts here */
+ struct ice_q_stats stats;
+ struct u64_stats_sync syncp;
+ struct rcu_head rcu; /* to avoid race on free */
+ DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */
+ struct ice_channel *ch;
+ struct ice_ptp_tx *tx_tstamps;
+ spinlock_t tx_lock;
+ u32 txq_teid; /* Added Tx queue TEID */
+ /* CL4 - 4th cacheline starts here */
+ u16 xdp_tx_active;
+#define ICE_TX_FLAGS_RING_XDP BIT(0)
+#define ICE_TX_FLAGS_RING_VLAN_L2TAG1 BIT(1)
+#define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2)
+ u8 flags;
+ u8 dcb_tc; /* Traffic class of ring */
+ u8 ptp_tx;
+} ____cacheline_internodealigned_in_smp;
+
+static inline bool ice_ring_uses_build_skb(struct ice_rx_ring *ring)
+{
+ return !!(ring->flags & ICE_RX_FLAGS_RING_BUILD_SKB);
+}
+
+static inline void ice_set_ring_build_skb_ena(struct ice_rx_ring *ring)
+{
+ ring->flags |= ICE_RX_FLAGS_RING_BUILD_SKB;
+}
+
+static inline void ice_clear_ring_build_skb_ena(struct ice_rx_ring *ring)
+{
+ ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB;
+}
+
+static inline bool ice_ring_ch_enabled(struct ice_tx_ring *ring)
+{
+ return !!ring->ch;
+}
+
+static inline bool ice_ring_is_xdp(struct ice_tx_ring *ring)
+{
+ return !!(ring->flags & ICE_TX_FLAGS_RING_XDP);
+}
+
+enum ice_container_type {
+ ICE_RX_CONTAINER,
+ ICE_TX_CONTAINER,
+};
+
+struct ice_ring_container {
+ /* head of linked-list of rings */
+ union {
+ struct ice_rx_ring *rx_ring;
+ struct ice_tx_ring *tx_ring;
+ };
+ struct dim dim; /* data for net_dim algorithm */
+ u16 itr_idx; /* index in the interrupt vector */
+ /* this matches the maximum number of ITR bits, but in usec
+ * values, so it is shifted left one bit (bit zero is ignored)
+ */
+ union {
+ struct {
+ u16 itr_setting:13;
+ u16 itr_reserved:2;
+ u16 itr_mode:1;
+ };
+ u16 itr_settings;
+ };
+ enum ice_container_type type;
+};
+
+struct ice_coalesce_stored {
+ u16 itr_tx;
+ u16 itr_rx;
+ u8 intrl;
+ u8 tx_valid;
+ u8 rx_valid;
+};
+
+/* iterator for handling rings in ring container */
+#define ice_for_each_rx_ring(pos, head) \
+ for (pos = (head).rx_ring; pos; pos = pos->next)
+
+#define ice_for_each_tx_ring(pos, head) \
+ for (pos = (head).tx_ring; pos; pos = pos->next)
+
+static inline unsigned int ice_rx_pg_order(struct ice_rx_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+ if (ring->rx_buf_len > (PAGE_SIZE / 2))
+ return 1;
+#endif
+ return 0;
+}
+
+#define ice_rx_pg_size(_ring) (PAGE_SIZE << ice_rx_pg_order(_ring))
+
+union ice_32b_rx_flex_desc;
+
+bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, u16 cleaned_count);
+netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);
+u16
+ice_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev);
+void ice_clean_tx_ring(struct ice_tx_ring *tx_ring);
+void ice_clean_rx_ring(struct ice_rx_ring *rx_ring);
+int ice_setup_tx_ring(struct ice_tx_ring *tx_ring);
+int ice_setup_rx_ring(struct ice_rx_ring *rx_ring);
+void ice_free_tx_ring(struct ice_tx_ring *tx_ring);
+void ice_free_rx_ring(struct ice_rx_ring *rx_ring);
+int ice_napi_poll(struct napi_struct *napi, int budget);
+int
+ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc,
+ u8 *raw_packet);
+int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget);
+void ice_clean_ctrl_tx_irq(struct ice_tx_ring *tx_ring);
+#endif /* _ICE_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
new file mode 100644
index 000000000..7ee38d02d
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -0,0 +1,369 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include <linux/filter.h>
+
+#include "ice_txrx_lib.h"
+#include "ice_eswitch.h"
+#include "ice_lib.h"
+
+/**
+ * ice_release_rx_desc - Store the new tail and head values
+ * @rx_ring: ring to bump
+ * @val: new head index
+ */
+void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val)
+{
+ u16 prev_ntu = rx_ring->next_to_use & ~0x7;
+
+ rx_ring->next_to_use = val;
+
+ /* update next to alloc since we have filled the ring */
+ rx_ring->next_to_alloc = val;
+
+ /* QRX_TAIL will be updated with any tail value, but hardware ignores
+ * the lower 3 bits. This makes it so we only bump tail on meaningful
+ * boundaries. Also, this allows us to bump tail on intervals of 8 up to
+ * the budget depending on the current traffic load.
+ */
+ val &= ~0x7;
+ if (prev_ntu != val) {
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64).
+ */
+ wmb();
+ writel(val, rx_ring->tail);
+ }
+}
+
+/**
+ * ice_ptype_to_htype - get a hash type
+ * @ptype: the ptype value from the descriptor
+ *
+ * Returns appropriate hash type (such as PKT_HASH_TYPE_L2/L3/L4) to be used by
+ * skb_set_hash based on PTYPE as parsed by HW Rx pipeline and is part of
+ * Rx desc.
+ */
+static enum pkt_hash_types ice_ptype_to_htype(u16 ptype)
+{
+ struct ice_rx_ptype_decoded decoded = ice_decode_rx_desc_ptype(ptype);
+
+ if (!decoded.known)
+ return PKT_HASH_TYPE_NONE;
+ if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4)
+ return PKT_HASH_TYPE_L4;
+ if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3)
+ return PKT_HASH_TYPE_L3;
+ if (decoded.outer_ip == ICE_RX_PTYPE_OUTER_L2)
+ return PKT_HASH_TYPE_L2;
+
+ return PKT_HASH_TYPE_NONE;
+}
+
+/**
+ * ice_rx_hash - set the hash value in the skb
+ * @rx_ring: descriptor ring
+ * @rx_desc: specific descriptor
+ * @skb: pointer to current skb
+ * @rx_ptype: the ptype value from the descriptor
+ */
+static void
+ice_rx_hash(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
+ struct sk_buff *skb, u16 rx_ptype)
+{
+ struct ice_32b_rx_flex_desc_nic *nic_mdid;
+ u32 hash;
+
+ if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
+ return;
+
+ if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
+ return;
+
+ nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
+ hash = le32_to_cpu(nic_mdid->rss_hash);
+ skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
+}
+
+/**
+ * ice_rx_csum - Indicate in skb if checksum is good
+ * @ring: the ring we care about
+ * @skb: skb currently being received and modified
+ * @rx_desc: the receive descriptor
+ * @ptype: the packet type decoded by hardware
+ *
+ * skb->protocol must be set before this function is called
+ */
+static void
+ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb,
+ union ice_32b_rx_flex_desc *rx_desc, u16 ptype)
+{
+ struct ice_rx_ptype_decoded decoded;
+ u16 rx_status0, rx_status1;
+ bool ipv4, ipv6;
+
+ rx_status0 = le16_to_cpu(rx_desc->wb.status_error0);
+ rx_status1 = le16_to_cpu(rx_desc->wb.status_error1);
+
+ decoded = ice_decode_rx_desc_ptype(ptype);
+
+ /* Start with CHECKSUM_NONE and by default csum_level = 0 */
+ skb->ip_summed = CHECKSUM_NONE;
+ skb_checksum_none_assert(skb);
+
+ /* check if Rx checksum is enabled */
+ if (!(ring->netdev->features & NETIF_F_RXCSUM))
+ return;
+
+ /* check if HW has decoded the packet and checksum */
+ if (!(rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
+ return;
+
+ if (!(decoded.known && decoded.outer_ip))
+ return;
+
+ ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+ (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
+ ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+ (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
+
+ if (ipv4 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
+ BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
+ goto checksum_fail;
+
+ if (ipv6 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
+ goto checksum_fail;
+
+ /* check for L4 errors and handle packets that were not able to be
+ * checksummed due to arrival speed
+ */
+ if (rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
+ goto checksum_fail;
+
+ /* check for outer UDP checksum error in tunneled packets */
+ if ((rx_status1 & BIT(ICE_RX_FLEX_DESC_STATUS1_NAT_S)) &&
+ (rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)))
+ goto checksum_fail;
+
+ /* If there is an outer header present that might contain a checksum
+ * we need to bump the checksum level by 1 to reflect the fact that
+ * we are indicating we validated the inner checksum.
+ */
+ if (decoded.tunnel_type >= ICE_RX_PTYPE_TUNNEL_IP_GRENAT)
+ skb->csum_level = 1;
+
+ /* Only report checksum unnecessary for TCP, UDP, or SCTP */
+ switch (decoded.inner_prot) {
+ case ICE_RX_PTYPE_INNER_PROT_TCP:
+ case ICE_RX_PTYPE_INNER_PROT_UDP:
+ case ICE_RX_PTYPE_INNER_PROT_SCTP:
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ break;
+ default:
+ break;
+ }
+ return;
+
+checksum_fail:
+ ring->vsi->back->hw_csum_rx_error++;
+}
+
+/**
+ * ice_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: Rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ * @ptype: the packet type decoded by hardware
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
+ */
+void
+ice_process_skb_fields(struct ice_rx_ring *rx_ring,
+ union ice_32b_rx_flex_desc *rx_desc,
+ struct sk_buff *skb, u16 ptype)
+{
+ ice_rx_hash(rx_ring, rx_desc, skb, ptype);
+
+ /* modifies the skb - consumes the enet header */
+ skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+
+ ice_rx_csum(rx_ring, skb, rx_desc, ptype);
+
+ if (rx_ring->ptp_rx)
+ ice_ptp_rx_hwtstamp(rx_ring, rx_desc, skb);
+}
+
+/**
+ * ice_receive_skb - Send a completed packet up the stack
+ * @rx_ring: Rx ring in play
+ * @skb: packet to send up
+ * @vlan_tag: VLAN tag for packet
+ *
+ * This function sends the completed packet (via. skb) up the stack using
+ * gro receive functions (with/without VLAN tag)
+ */
+void
+ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
+{
+ netdev_features_t features = rx_ring->netdev->features;
+ bool non_zero_vlan = !!(vlan_tag & VLAN_VID_MASK);
+
+ if ((features & NETIF_F_HW_VLAN_CTAG_RX) && non_zero_vlan)
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+ else if ((features & NETIF_F_HW_VLAN_STAG_RX) && non_zero_vlan)
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD), vlan_tag);
+
+ napi_gro_receive(&rx_ring->q_vector->napi, skb);
+}
+
+/**
+ * ice_clean_xdp_irq - Reclaim resources after transmit completes on XDP ring
+ * @xdp_ring: XDP ring to clean
+ */
+static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
+{
+ unsigned int total_bytes = 0, total_pkts = 0;
+ u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
+ u16 ntc = xdp_ring->next_to_clean;
+ struct ice_tx_desc *next_dd_desc;
+ u16 next_dd = xdp_ring->next_dd;
+ struct ice_tx_buf *tx_buf;
+ int i;
+
+ next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd);
+ if (!(next_dd_desc->cmd_type_offset_bsz &
+ cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+ return;
+
+ for (i = 0; i < tx_thresh; i++) {
+ tx_buf = &xdp_ring->tx_buf[ntc];
+
+ total_bytes += tx_buf->bytecount;
+ /* normally tx_buf->gso_segs was taken but at this point
+ * it's always 1 for us
+ */
+ total_pkts++;
+
+ page_frag_free(tx_buf->raw_buf);
+ dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buf, len, 0);
+ tx_buf->raw_buf = NULL;
+
+ ntc++;
+ if (ntc >= xdp_ring->count)
+ ntc = 0;
+ }
+
+ next_dd_desc->cmd_type_offset_bsz = 0;
+ xdp_ring->next_dd = xdp_ring->next_dd + tx_thresh;
+ if (xdp_ring->next_dd > xdp_ring->count)
+ xdp_ring->next_dd = tx_thresh - 1;
+ xdp_ring->next_to_clean = ntc;
+ ice_update_tx_ring_stats(xdp_ring, total_pkts, total_bytes);
+}
+
+/**
+ * ice_xmit_xdp_ring - submit single packet to XDP ring for transmission
+ * @data: packet data pointer
+ * @size: packet data size
+ * @xdp_ring: XDP ring for transmission
+ */
+int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring)
+{
+ u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
+ u16 i = xdp_ring->next_to_use;
+ struct ice_tx_desc *tx_desc;
+ struct ice_tx_buf *tx_buf;
+ dma_addr_t dma;
+
+ if (ICE_DESC_UNUSED(xdp_ring) < tx_thresh)
+ ice_clean_xdp_irq(xdp_ring);
+
+ if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) {
+ xdp_ring->tx_stats.tx_busy++;
+ return ICE_XDP_CONSUMED;
+ }
+
+ dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(xdp_ring->dev, dma))
+ return ICE_XDP_CONSUMED;
+
+ tx_buf = &xdp_ring->tx_buf[i];
+ tx_buf->bytecount = size;
+ tx_buf->gso_segs = 1;
+ tx_buf->raw_buf = data;
+
+ /* record length, and DMA address */
+ dma_unmap_len_set(tx_buf, len, size);
+ dma_unmap_addr_set(tx_buf, dma, dma);
+
+ tx_desc = ICE_TX_DESC(xdp_ring, i);
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, 0,
+ size, 0);
+
+ xdp_ring->xdp_tx_active++;
+ i++;
+ if (i == xdp_ring->count) {
+ i = 0;
+ tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
+ tx_desc->cmd_type_offset_bsz |=
+ cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
+ xdp_ring->next_rs = tx_thresh - 1;
+ }
+ xdp_ring->next_to_use = i;
+
+ if (i > xdp_ring->next_rs) {
+ tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
+ tx_desc->cmd_type_offset_bsz |=
+ cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
+ xdp_ring->next_rs += tx_thresh;
+ }
+
+ return ICE_XDP_TX;
+}
+
+/**
+ * ice_xmit_xdp_buff - convert an XDP buffer to an XDP frame and send it
+ * @xdp: XDP buffer
+ * @xdp_ring: XDP Tx ring
+ *
+ * Returns negative on failure, 0 on success.
+ */
+int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring)
+{
+ struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
+
+ if (unlikely(!xdpf))
+ return ICE_XDP_CONSUMED;
+
+ return ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+}
+
+/**
+ * ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
+ * @xdp_ring: XDP ring
+ * @xdp_res: Result of the receive batch
+ *
+ * This function bumps XDP Tx tail and/or flush redirect map, and
+ * should be called when a batch of packets has been processed in the
+ * napi loop.
+ */
+void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res)
+{
+ if (xdp_res & ICE_XDP_REDIR)
+ xdp_do_flush_map();
+
+ if (xdp_res & ICE_XDP_TX) {
+ if (static_branch_unlikely(&ice_xdp_locking_key))
+ spin_lock(&xdp_ring->tx_lock);
+ ice_xdp_ring_update_tail(xdp_ring);
+ if (static_branch_unlikely(&ice_xdp_locking_key))
+ spin_unlock(&xdp_ring->tx_lock);
+ }
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
new file mode 100644
index 000000000..c7d2954dc
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_TXRX_LIB_H_
+#define _ICE_TXRX_LIB_H_
+#include "ice.h"
+
+/**
+ * ice_test_staterr - tests bits in Rx descriptor status and error fields
+ * @status_err_n: Rx descriptor status_error0 or status_error1 bits
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static inline bool
+ice_test_staterr(__le16 status_err_n, const u16 stat_err_bits)
+{
+ return !!(status_err_n & cpu_to_le16(stat_err_bits));
+}
+
+static inline __le64
+ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
+{
+ return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+ (td_cmd << ICE_TXD_QW1_CMD_S) |
+ (td_offset << ICE_TXD_QW1_OFFSET_S) |
+ ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+ (td_tag << ICE_TXD_QW1_L2TAG1_S));
+}
+
+/**
+ * ice_get_vlan_tag_from_rx_desc - get VLAN from Rx flex descriptor
+ * @rx_desc: Rx 32b flex descriptor with RXDID=2
+ *
+ * The OS and current PF implementation only support stripping a single VLAN tag
+ * at a time, so there should only ever be 0 or 1 tags in the l2tag* fields. If
+ * one is found return the tag, else return 0 to mean no VLAN tag was found.
+ */
+static inline u16
+ice_get_vlan_tag_from_rx_desc(union ice_32b_rx_flex_desc *rx_desc)
+{
+ u16 stat_err_bits;
+
+ stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
+ if (ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
+ return le16_to_cpu(rx_desc->wb.l2tag1);
+
+ stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS1_L2TAG2P_S);
+ if (ice_test_staterr(rx_desc->wb.status_error1, stat_err_bits))
+ return le16_to_cpu(rx_desc->wb.l2tag2_2nd);
+
+ return 0;
+}
+
+/**
+ * ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register
+ * @xdp_ring: XDP Tx ring
+ *
+ * This function updates the XDP Tx ring tail register.
+ */
+static inline void ice_xdp_ring_update_tail(struct ice_tx_ring *xdp_ring)
+{
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch.
+ */
+ wmb();
+ writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
+}
+
+void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res);
+int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring);
+int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring);
+void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val);
+void
+ice_process_skb_fields(struct ice_rx_ring *rx_ring,
+ union ice_32b_rx_flex_desc *rx_desc,
+ struct sk_buff *skb, u16 ptype);
+void
+ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
+#endif /* !_ICE_TXRX_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
new file mode 100644
index 000000000..e1abfcee9
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -0,0 +1,1148 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_TYPE_H_
+#define _ICE_TYPE_H_
+
+#define ICE_BYTES_PER_WORD 2
+#define ICE_BYTES_PER_DWORD 4
+#define ICE_CHNL_MAX_TC 16
+
+#include "ice_hw_autogen.h"
+#include "ice_devids.h"
+#include "ice_osdep.h"
+#include "ice_controlq.h"
+#include "ice_lan_tx_rx.h"
+#include "ice_flex_type.h"
+#include "ice_protocol_type.h"
+#include "ice_sbq_cmd.h"
+#include "ice_vlan_mode.h"
+
+static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc)
+{
+ return test_bit(tc, &bitmap);
+}
+
+static inline u64 round_up_64bit(u64 a, u32 b)
+{
+ return div64_long(((a) + (b) / 2), (b));
+}
+
+static inline u32 ice_round_to_num(u32 N, u32 R)
+{
+ return ((((N) % (R)) < ((R) / 2)) ? (((N) / (R)) * (R)) :
+ ((((N) + (R) - 1) / (R)) * (R)));
+}
+
+/* Driver always calls main vsi_handle first */
+#define ICE_MAIN_VSI_HANDLE 0
+
+/* debug masks - set these bits in hw->debug_mask to control output */
+#define ICE_DBG_INIT BIT_ULL(1)
+#define ICE_DBG_FW_LOG BIT_ULL(3)
+#define ICE_DBG_LINK BIT_ULL(4)
+#define ICE_DBG_PHY BIT_ULL(5)
+#define ICE_DBG_QCTX BIT_ULL(6)
+#define ICE_DBG_NVM BIT_ULL(7)
+#define ICE_DBG_LAN BIT_ULL(8)
+#define ICE_DBG_FLOW BIT_ULL(9)
+#define ICE_DBG_SW BIT_ULL(13)
+#define ICE_DBG_SCHED BIT_ULL(14)
+#define ICE_DBG_RDMA BIT_ULL(15)
+#define ICE_DBG_PKG BIT_ULL(16)
+#define ICE_DBG_RES BIT_ULL(17)
+#define ICE_DBG_PTP BIT_ULL(19)
+#define ICE_DBG_AQ_MSG BIT_ULL(24)
+#define ICE_DBG_AQ_DESC BIT_ULL(25)
+#define ICE_DBG_AQ_DESC_BUF BIT_ULL(26)
+#define ICE_DBG_AQ_CMD BIT_ULL(27)
+#define ICE_DBG_AQ (ICE_DBG_AQ_MSG | \
+ ICE_DBG_AQ_DESC | \
+ ICE_DBG_AQ_DESC_BUF | \
+ ICE_DBG_AQ_CMD)
+
+#define ICE_DBG_USER BIT_ULL(31)
+
+enum ice_aq_res_ids {
+ ICE_NVM_RES_ID = 1,
+ ICE_SPD_RES_ID,
+ ICE_CHANGE_LOCK_RES_ID,
+ ICE_GLOBAL_CFG_LOCK_RES_ID
+};
+
+/* FW update timeout definitions are in milliseconds */
+#define ICE_NVM_TIMEOUT 180000
+#define ICE_CHANGE_LOCK_TIMEOUT 1000
+#define ICE_GLOBAL_CFG_LOCK_TIMEOUT 5000
+
+enum ice_aq_res_access_type {
+ ICE_RES_READ = 1,
+ ICE_RES_WRITE
+};
+
+struct ice_driver_ver {
+ u8 major_ver;
+ u8 minor_ver;
+ u8 build_ver;
+ u8 subbuild_ver;
+ u8 driver_string[32];
+};
+
+enum ice_fc_mode {
+ ICE_FC_NONE = 0,
+ ICE_FC_RX_PAUSE,
+ ICE_FC_TX_PAUSE,
+ ICE_FC_FULL,
+ ICE_FC_PFC,
+ ICE_FC_DFLT
+};
+
+enum ice_phy_cache_mode {
+ ICE_FC_MODE = 0,
+ ICE_SPEED_MODE,
+ ICE_FEC_MODE
+};
+
+enum ice_fec_mode {
+ ICE_FEC_NONE = 0,
+ ICE_FEC_RS,
+ ICE_FEC_BASER,
+ ICE_FEC_AUTO
+};
+
+struct ice_phy_cache_mode_data {
+ union {
+ enum ice_fec_mode curr_user_fec_req;
+ enum ice_fc_mode curr_user_fc_req;
+ u16 curr_user_speed_req;
+ } data;
+};
+
+enum ice_set_fc_aq_failures {
+ ICE_SET_FC_AQ_FAIL_NONE = 0,
+ ICE_SET_FC_AQ_FAIL_GET,
+ ICE_SET_FC_AQ_FAIL_SET,
+ ICE_SET_FC_AQ_FAIL_UPDATE
+};
+
+/* Various MAC types */
+enum ice_mac_type {
+ ICE_MAC_UNKNOWN = 0,
+ ICE_MAC_E810,
+ ICE_MAC_GENERIC,
+};
+
+/* Media Types */
+enum ice_media_type {
+ ICE_MEDIA_UNKNOWN = 0,
+ ICE_MEDIA_FIBER,
+ ICE_MEDIA_BASET,
+ ICE_MEDIA_BACKPLANE,
+ ICE_MEDIA_DA,
+};
+
+enum ice_vsi_type {
+ ICE_VSI_PF = 0,
+ ICE_VSI_VF = 1,
+ ICE_VSI_CTRL = 3, /* equates to ICE_VSI_PF with 1 queue pair */
+ ICE_VSI_CHNL = 4,
+ ICE_VSI_LB = 6,
+ ICE_VSI_SWITCHDEV_CTRL = 7,
+};
+
+struct ice_link_status {
+ /* Refer to ice_aq_phy_type for bits definition */
+ u64 phy_type_low;
+ u64 phy_type_high;
+ u8 topo_media_conflict;
+ u16 max_frame_size;
+ u16 link_speed;
+ u16 req_speeds;
+ u8 link_cfg_err;
+ u8 lse_ena; /* Link Status Event notification */
+ u8 link_info;
+ u8 an_info;
+ u8 ext_info;
+ u8 fec_info;
+ u8 pacing;
+ /* Refer to #define from module_type[ICE_MODULE_TYPE_TOTAL_BYTE] of
+ * ice_aqc_get_phy_caps structure
+ */
+ u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE];
+};
+
+/* Different reset sources for which a disable queue AQ call has to be made in
+ * order to clean the Tx scheduler as a part of the reset
+ */
+enum ice_disq_rst_src {
+ ICE_NO_RESET = 0,
+ ICE_VM_RESET,
+ ICE_VF_RESET,
+};
+
+/* PHY info such as phy_type, etc... */
+struct ice_phy_info {
+ struct ice_link_status link_info;
+ struct ice_link_status link_info_old;
+ u64 phy_type_low;
+ u64 phy_type_high;
+ enum ice_media_type media_type;
+ u8 get_link_info;
+ /* Please refer to struct ice_aqc_get_link_status_data to get
+ * detail of enable bit in curr_user_speed_req
+ */
+ u16 curr_user_speed_req;
+ enum ice_fec_mode curr_user_fec_req;
+ enum ice_fc_mode curr_user_fc_req;
+ struct ice_aqc_set_phy_cfg_data curr_user_phy_cfg;
+};
+
+/* protocol enumeration for filters */
+enum ice_fltr_ptype {
+ /* NONE - used for undef/error */
+ ICE_FLTR_PTYPE_NONF_NONE = 0,
+ ICE_FLTR_PTYPE_NONF_IPV4_UDP,
+ ICE_FLTR_PTYPE_NONF_IPV4_TCP,
+ ICE_FLTR_PTYPE_NONF_IPV4_SCTP,
+ ICE_FLTR_PTYPE_NONF_IPV4_OTHER,
+ ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP,
+ ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP,
+ ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP,
+ ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER,
+ ICE_FLTR_PTYPE_NONF_IPV6_GTPU_IPV6_OTHER,
+ ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3,
+ ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3,
+ ICE_FLTR_PTYPE_NONF_IPV4_ESP,
+ ICE_FLTR_PTYPE_NONF_IPV6_ESP,
+ ICE_FLTR_PTYPE_NONF_IPV4_AH,
+ ICE_FLTR_PTYPE_NONF_IPV6_AH,
+ ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP,
+ ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP,
+ ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE,
+ ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION,
+ ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE,
+ ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION,
+ ICE_FLTR_PTYPE_NON_IP_L2,
+ ICE_FLTR_PTYPE_FRAG_IPV4,
+ ICE_FLTR_PTYPE_NONF_IPV6_UDP,
+ ICE_FLTR_PTYPE_NONF_IPV6_TCP,
+ ICE_FLTR_PTYPE_NONF_IPV6_SCTP,
+ ICE_FLTR_PTYPE_NONF_IPV6_OTHER,
+ ICE_FLTR_PTYPE_MAX,
+};
+
+enum ice_fd_hw_seg {
+ ICE_FD_HW_SEG_NON_TUN = 0,
+ ICE_FD_HW_SEG_TUN,
+ ICE_FD_HW_SEG_MAX,
+};
+
+/* 1 ICE_VSI_PF + 1 ICE_VSI_CTRL + ICE_CHNL_MAX_TC */
+#define ICE_MAX_FDIR_VSI_PER_FILTER (2 + ICE_CHNL_MAX_TC)
+
+struct ice_fd_hw_prof {
+ struct ice_flow_seg_info *fdir_seg[ICE_FD_HW_SEG_MAX];
+ int cnt;
+ u64 entry_h[ICE_MAX_FDIR_VSI_PER_FILTER][ICE_FD_HW_SEG_MAX];
+ u16 vsi_h[ICE_MAX_FDIR_VSI_PER_FILTER];
+};
+
+/* Common HW capabilities for SW use */
+struct ice_hw_common_caps {
+ u32 valid_functions;
+ /* DCB capabilities */
+ u32 active_tc_bitmap;
+ u32 maxtc;
+
+ /* Tx/Rx queues */
+ u16 num_rxq; /* Number/Total Rx queues */
+ u16 rxq_first_id; /* First queue ID for Rx queues */
+ u16 num_txq; /* Number/Total Tx queues */
+ u16 txq_first_id; /* First queue ID for Tx queues */
+
+ /* MSI-X vectors */
+ u16 num_msix_vectors;
+ u16 msix_vector_first_id;
+
+ /* Max MTU for function or device */
+ u16 max_mtu;
+
+ /* Virtualization support */
+ u8 sr_iov_1_1; /* SR-IOV enabled */
+
+ /* RSS related capabilities */
+ u16 rss_table_size; /* 512 for PFs and 64 for VFs */
+ u8 rss_table_entry_width; /* RSS Entry width in bits */
+
+ u8 dcb;
+ u8 ieee_1588;
+ u8 rdma;
+
+ bool nvm_update_pending_nvm;
+ bool nvm_update_pending_orom;
+ bool nvm_update_pending_netlist;
+#define ICE_NVM_PENDING_NVM_IMAGE BIT(0)
+#define ICE_NVM_PENDING_OROM BIT(1)
+#define ICE_NVM_PENDING_NETLIST BIT(2)
+ bool nvm_unified_update;
+#define ICE_NVM_MGMT_UNIFIED_UPD_SUPPORT BIT(3)
+ /* PCIe reset avoidance */
+ bool pcie_reset_avoidance;
+ /* Post update reset restriction */
+ bool reset_restrict_support;
+};
+
+/* IEEE 1588 TIME_SYNC specific info */
+/* Function specific definitions */
+#define ICE_TS_FUNC_ENA_M BIT(0)
+#define ICE_TS_SRC_TMR_OWND_M BIT(1)
+#define ICE_TS_TMR_ENA_M BIT(2)
+#define ICE_TS_TMR_IDX_OWND_S 4
+#define ICE_TS_TMR_IDX_OWND_M BIT(4)
+#define ICE_TS_CLK_FREQ_S 16
+#define ICE_TS_CLK_FREQ_M ICE_M(0x7, ICE_TS_CLK_FREQ_S)
+#define ICE_TS_CLK_SRC_S 20
+#define ICE_TS_CLK_SRC_M BIT(20)
+#define ICE_TS_TMR_IDX_ASSOC_S 24
+#define ICE_TS_TMR_IDX_ASSOC_M BIT(24)
+
+/* TIME_REF clock rate specification */
+enum ice_time_ref_freq {
+ ICE_TIME_REF_FREQ_25_000 = 0,
+ ICE_TIME_REF_FREQ_122_880 = 1,
+ ICE_TIME_REF_FREQ_125_000 = 2,
+ ICE_TIME_REF_FREQ_153_600 = 3,
+ ICE_TIME_REF_FREQ_156_250 = 4,
+ ICE_TIME_REF_FREQ_245_760 = 5,
+
+ NUM_ICE_TIME_REF_FREQ
+};
+
+/* Clock source specification */
+enum ice_clk_src {
+ ICE_CLK_SRC_TCX0 = 0, /* Temperature compensated oscillator */
+ ICE_CLK_SRC_TIME_REF = 1, /* Use TIME_REF reference clock */
+
+ NUM_ICE_CLK_SRC
+};
+
+struct ice_ts_func_info {
+ /* Function specific info */
+ enum ice_time_ref_freq time_ref;
+ u8 clk_freq;
+ u8 clk_src;
+ u8 tmr_index_assoc;
+ u8 ena;
+ u8 tmr_index_owned;
+ u8 src_tmr_owned;
+ u8 tmr_ena;
+};
+
+/* Device specific definitions */
+#define ICE_TS_TMR0_OWNR_M 0x7
+#define ICE_TS_TMR0_OWND_M BIT(3)
+#define ICE_TS_TMR1_OWNR_S 4
+#define ICE_TS_TMR1_OWNR_M ICE_M(0x7, ICE_TS_TMR1_OWNR_S)
+#define ICE_TS_TMR1_OWND_M BIT(7)
+#define ICE_TS_DEV_ENA_M BIT(24)
+#define ICE_TS_TMR0_ENA_M BIT(25)
+#define ICE_TS_TMR1_ENA_M BIT(26)
+#define ICE_TS_LL_TX_TS_READ_M BIT(28)
+
+struct ice_ts_dev_info {
+ /* Device specific info */
+ u32 ena_ports;
+ u32 tmr_own_map;
+ u32 tmr0_owner;
+ u32 tmr1_owner;
+ u8 tmr0_owned;
+ u8 tmr1_owned;
+ u8 ena;
+ u8 tmr0_ena;
+ u8 tmr1_ena;
+ u8 ts_ll_read;
+};
+
+/* Function specific capabilities */
+struct ice_hw_func_caps {
+ struct ice_hw_common_caps common_cap;
+ u32 num_allocd_vfs; /* Number of allocated VFs */
+ u32 vf_base_id; /* Logical ID of the first VF */
+ u32 guar_num_vsi;
+ u32 fd_fltr_guar; /* Number of filters guaranteed */
+ u32 fd_fltr_best_effort; /* Number of best effort filters */
+ struct ice_ts_func_info ts_func_info;
+};
+
+/* Device wide capabilities */
+struct ice_hw_dev_caps {
+ struct ice_hw_common_caps common_cap;
+ u32 num_vfs_exposed; /* Total number of VFs exposed */
+ u32 num_vsi_allocd_to_host; /* Excluding EMP VSI */
+ u32 num_flow_director_fltr; /* Number of FD filters available */
+ struct ice_ts_dev_info ts_dev_info;
+ u32 num_funcs;
+};
+
+/* MAC info */
+struct ice_mac_info {
+ u8 lan_addr[ETH_ALEN];
+ u8 perm_addr[ETH_ALEN];
+};
+
+/* Reset types used to determine which kind of reset was requested. These
+ * defines match what the RESET_TYPE field of the GLGEN_RSTAT register.
+ * ICE_RESET_PFR does not match any RESET_TYPE field in the GLGEN_RSTAT register
+ * because its reset source is different than the other types listed.
+ */
+enum ice_reset_req {
+ ICE_RESET_POR = 0,
+ ICE_RESET_INVAL = 0,
+ ICE_RESET_CORER = 1,
+ ICE_RESET_GLOBR = 2,
+ ICE_RESET_EMPR = 3,
+ ICE_RESET_PFR = 4,
+};
+
+/* Bus parameters */
+struct ice_bus_info {
+ u16 device;
+ u8 func;
+};
+
+/* Flow control (FC) parameters */
+struct ice_fc_info {
+ enum ice_fc_mode current_mode; /* FC mode in effect */
+ enum ice_fc_mode req_mode; /* FC mode requested by caller */
+};
+
+/* Option ROM version information */
+struct ice_orom_info {
+ u8 major; /* Major version of OROM */
+ u8 patch; /* Patch version of OROM */
+ u16 build; /* Build version of OROM */
+};
+
+/* NVM version information */
+struct ice_nvm_info {
+ u32 eetrack;
+ u8 major;
+ u8 minor;
+};
+
+/* netlist version information */
+struct ice_netlist_info {
+ u32 major; /* major high/low */
+ u32 minor; /* minor high/low */
+ u32 type; /* type high/low */
+ u32 rev; /* revision high/low */
+ u32 hash; /* SHA-1 hash word */
+ u16 cust_ver; /* customer version */
+};
+
+/* Enumeration of possible flash banks for the NVM, OROM, and Netlist modules
+ * of the flash image.
+ */
+enum ice_flash_bank {
+ ICE_INVALID_FLASH_BANK,
+ ICE_1ST_FLASH_BANK,
+ ICE_2ND_FLASH_BANK,
+};
+
+/* Enumeration of which flash bank is desired to read from, either the active
+ * bank or the inactive bank. Used to abstract 1st and 2nd bank notion from
+ * code which just wants to read the active or inactive flash bank.
+ */
+enum ice_bank_select {
+ ICE_ACTIVE_FLASH_BANK,
+ ICE_INACTIVE_FLASH_BANK,
+};
+
+/* information for accessing NVM, OROM, and Netlist flash banks */
+struct ice_bank_info {
+ u32 nvm_ptr; /* Pointer to 1st NVM bank */
+ u32 nvm_size; /* Size of NVM bank */
+ u32 orom_ptr; /* Pointer to 1st OROM bank */
+ u32 orom_size; /* Size of OROM bank */
+ u32 netlist_ptr; /* Pointer to 1st Netlist bank */
+ u32 netlist_size; /* Size of Netlist bank */
+ enum ice_flash_bank nvm_bank; /* Active NVM bank */
+ enum ice_flash_bank orom_bank; /* Active OROM bank */
+ enum ice_flash_bank netlist_bank; /* Active Netlist bank */
+};
+
+/* Flash Chip Information */
+struct ice_flash_info {
+ struct ice_orom_info orom; /* Option ROM version info */
+ struct ice_nvm_info nvm; /* NVM version information */
+ struct ice_netlist_info netlist;/* Netlist version info */
+ struct ice_bank_info banks; /* Flash Bank information */
+ u16 sr_words; /* Shadow RAM size in words */
+ u32 flash_size; /* Size of available flash in bytes */
+ u8 blank_nvm_mode; /* is NVM empty (no FW present) */
+};
+
+struct ice_link_default_override_tlv {
+ u8 options;
+#define ICE_LINK_OVERRIDE_OPT_M 0x3F
+#define ICE_LINK_OVERRIDE_STRICT_MODE BIT(0)
+#define ICE_LINK_OVERRIDE_EPCT_DIS BIT(1)
+#define ICE_LINK_OVERRIDE_PORT_DIS BIT(2)
+#define ICE_LINK_OVERRIDE_EN BIT(3)
+#define ICE_LINK_OVERRIDE_AUTO_LINK_DIS BIT(4)
+#define ICE_LINK_OVERRIDE_EEE_EN BIT(5)
+ u8 phy_config;
+#define ICE_LINK_OVERRIDE_PHY_CFG_S 8
+#define ICE_LINK_OVERRIDE_PHY_CFG_M (0xC3 << ICE_LINK_OVERRIDE_PHY_CFG_S)
+#define ICE_LINK_OVERRIDE_PAUSE_M 0x3
+#define ICE_LINK_OVERRIDE_LESM_EN BIT(6)
+#define ICE_LINK_OVERRIDE_AUTO_FEC_EN BIT(7)
+ u8 fec_options;
+#define ICE_LINK_OVERRIDE_FEC_OPT_M 0xFF
+ u8 rsvd1;
+ u64 phy_type_low;
+ u64 phy_type_high;
+};
+
+#define ICE_NVM_VER_LEN 32
+
+/* Max number of port to queue branches w.r.t topology */
+#define ICE_MAX_TRAFFIC_CLASS 8
+#define ICE_TXSCHED_MAX_BRANCHES ICE_MAX_TRAFFIC_CLASS
+
+#define ice_for_each_traffic_class(_i) \
+ for ((_i) = 0; (_i) < ICE_MAX_TRAFFIC_CLASS; (_i)++)
+
+/* ICE_DFLT_AGG_ID means that all new VM(s)/VSI node connects
+ * to driver defined policy for default aggregator
+ */
+#define ICE_INVAL_TEID 0xFFFFFFFF
+#define ICE_DFLT_AGG_ID 0
+
+struct ice_sched_node {
+ struct ice_sched_node *parent;
+ struct ice_sched_node *sibling; /* next sibling in the same layer */
+ struct ice_sched_node **children;
+ struct ice_aqc_txsched_elem_data info;
+ u32 agg_id; /* aggregator group ID */
+ u16 vsi_handle;
+ u8 in_use; /* suspended or in use */
+ u8 tx_sched_layer; /* Logical Layer (1-9) */
+ u8 num_children;
+ u8 tc_num;
+ u8 owner;
+#define ICE_SCHED_NODE_OWNER_LAN 0
+#define ICE_SCHED_NODE_OWNER_RDMA 2
+};
+
+/* Access Macros for Tx Sched Elements data */
+#define ICE_TXSCHED_GET_NODE_TEID(x) le32_to_cpu((x)->info.node_teid)
+
+/* The aggregator type determines if identifier is for a VSI group,
+ * aggregator group, aggregator of queues, or queue group.
+ */
+enum ice_agg_type {
+ ICE_AGG_TYPE_UNKNOWN = 0,
+ ICE_AGG_TYPE_VSI,
+ ICE_AGG_TYPE_AGG, /* aggregator */
+ ICE_AGG_TYPE_Q,
+ ICE_AGG_TYPE_QG
+};
+
+/* Rate limit types */
+enum ice_rl_type {
+ ICE_UNKNOWN_BW = 0,
+ ICE_MIN_BW, /* for CIR profile */
+ ICE_MAX_BW, /* for EIR profile */
+ ICE_SHARED_BW /* for shared profile */
+};
+
+#define ICE_SCHED_MIN_BW 500 /* in Kbps */
+#define ICE_SCHED_MAX_BW 100000000 /* in Kbps */
+#define ICE_SCHED_DFLT_BW 0xFFFFFFFF /* unlimited */
+#define ICE_SCHED_DFLT_RL_PROF_ID 0
+#define ICE_SCHED_NO_SHARED_RL_PROF_ID 0xFFFF
+#define ICE_SCHED_DFLT_BW_WT 4
+#define ICE_SCHED_INVAL_PROF_ID 0xFFFF
+#define ICE_SCHED_DFLT_BURST_SIZE (15 * 1024) /* in bytes (15k) */
+
+#define ICE_MAX_PORT_PER_PCI_DEV 8
+
+ /* Data structure for saving BW information */
+enum ice_bw_type {
+ ICE_BW_TYPE_PRIO,
+ ICE_BW_TYPE_CIR,
+ ICE_BW_TYPE_CIR_WT,
+ ICE_BW_TYPE_EIR,
+ ICE_BW_TYPE_EIR_WT,
+ ICE_BW_TYPE_SHARED,
+ ICE_BW_TYPE_CNT /* This must be last */
+};
+
+struct ice_bw {
+ u32 bw;
+ u16 bw_alloc;
+};
+
+struct ice_bw_type_info {
+ DECLARE_BITMAP(bw_t_bitmap, ICE_BW_TYPE_CNT);
+ u8 generic;
+ struct ice_bw cir_bw;
+ struct ice_bw eir_bw;
+ u32 shared_bw;
+};
+
+/* VSI queue context structure for given TC */
+struct ice_q_ctx {
+ u16 q_handle;
+ u32 q_teid;
+ /* bw_t_info saves queue BW information */
+ struct ice_bw_type_info bw_t_info;
+};
+
+/* VSI type list entry to locate corresponding VSI/aggregator nodes */
+struct ice_sched_vsi_info {
+ struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS];
+ struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS];
+ struct list_head list_entry;
+ u16 max_lanq[ICE_MAX_TRAFFIC_CLASS];
+ u16 max_rdmaq[ICE_MAX_TRAFFIC_CLASS];
+ /* bw_t_info saves VSI BW information */
+ struct ice_bw_type_info bw_t_info[ICE_MAX_TRAFFIC_CLASS];
+};
+
+/* driver defines the policy */
+struct ice_sched_tx_policy {
+ u16 max_num_vsis;
+ u8 max_num_lan_qs_per_tc[ICE_MAX_TRAFFIC_CLASS];
+ u8 rdma_ena;
+};
+
+/* CEE or IEEE 802.1Qaz ETS Configuration data */
+struct ice_dcb_ets_cfg {
+ u8 willing;
+ u8 cbs;
+ u8 maxtcs;
+ u8 prio_table[ICE_MAX_TRAFFIC_CLASS];
+ u8 tcbwtable[ICE_MAX_TRAFFIC_CLASS];
+ u8 tsatable[ICE_MAX_TRAFFIC_CLASS];
+};
+
+/* CEE or IEEE 802.1Qaz PFC Configuration data */
+struct ice_dcb_pfc_cfg {
+ u8 willing;
+ u8 mbc;
+ u8 pfccap;
+ u8 pfcena;
+};
+
+/* CEE or IEEE 802.1Qaz Application Priority data */
+struct ice_dcb_app_priority_table {
+ u16 prot_id;
+ u8 priority;
+ u8 selector;
+};
+
+#define ICE_MAX_USER_PRIORITY 8
+#define ICE_DCBX_MAX_APPS 64
+#define ICE_DSCP_NUM_VAL 64
+#define ICE_LLDPDU_SIZE 1500
+#define ICE_TLV_STATUS_OPER 0x1
+#define ICE_TLV_STATUS_SYNC 0x2
+#define ICE_TLV_STATUS_ERR 0x4
+#define ICE_APP_PROT_ID_ISCSI_860 0x035c
+#define ICE_APP_SEL_ETHTYPE 0x1
+#define ICE_APP_SEL_TCPIP 0x2
+#define ICE_CEE_APP_SEL_ETHTYPE 0x0
+#define ICE_SR_LINK_DEFAULT_OVERRIDE_PTR 0x134
+#define ICE_CEE_APP_SEL_TCPIP 0x1
+
+struct ice_dcbx_cfg {
+ u32 numapps;
+ u32 tlv_status; /* CEE mode TLV status */
+ struct ice_dcb_ets_cfg etscfg;
+ struct ice_dcb_ets_cfg etsrec;
+ struct ice_dcb_pfc_cfg pfc;
+#define ICE_QOS_MODE_VLAN 0x0
+#define ICE_QOS_MODE_DSCP 0x1
+ u8 pfc_mode;
+ struct ice_dcb_app_priority_table app[ICE_DCBX_MAX_APPS];
+ /* when DSCP mapping defined by user set its bit to 1 */
+ DECLARE_BITMAP(dscp_mapped, ICE_DSCP_NUM_VAL);
+ /* array holding DSCP -> UP/TC values for DSCP L3 QoS mode */
+ u8 dscp_map[ICE_DSCP_NUM_VAL];
+ u8 dcbx_mode;
+#define ICE_DCBX_MODE_CEE 0x1
+#define ICE_DCBX_MODE_IEEE 0x2
+ u8 app_mode;
+#define ICE_DCBX_APPS_NON_WILLING 0x1
+};
+
+struct ice_qos_cfg {
+ struct ice_dcbx_cfg local_dcbx_cfg; /* Oper/Local Cfg */
+ struct ice_dcbx_cfg desired_dcbx_cfg; /* CEE Desired Cfg */
+ struct ice_dcbx_cfg remote_dcbx_cfg; /* Peer Cfg */
+ u8 dcbx_status : 3; /* see ICE_DCBX_STATUS_DIS */
+ u8 is_sw_lldp : 1;
+};
+
+struct ice_port_info {
+ struct ice_sched_node *root; /* Root Node per Port */
+ struct ice_hw *hw; /* back pointer to HW instance */
+ u32 last_node_teid; /* scheduler last node info */
+ u16 sw_id; /* Initial switch ID belongs to port */
+ u16 pf_vf_num;
+ u8 port_state;
+#define ICE_SCHED_PORT_STATE_INIT 0x0
+#define ICE_SCHED_PORT_STATE_READY 0x1
+ u8 lport;
+#define ICE_LPORT_MASK 0xff
+ struct ice_fc_info fc;
+ struct ice_mac_info mac;
+ struct ice_phy_info phy;
+ struct mutex sched_lock; /* protect access to TXSched tree */
+ struct ice_sched_node *
+ sib_head[ICE_MAX_TRAFFIC_CLASS][ICE_AQC_TOPO_MAX_LEVEL_NUM];
+ /* List contain profile ID(s) and other params per layer */
+ struct list_head rl_prof_list[ICE_AQC_TOPO_MAX_LEVEL_NUM];
+ struct ice_qos_cfg qos_cfg;
+ u8 is_vf:1;
+};
+
+struct ice_switch_info {
+ struct list_head vsi_list_map_head;
+ struct ice_sw_recipe *recp_list;
+ u16 prof_res_bm_init;
+ u16 max_used_prof_index;
+
+ DECLARE_BITMAP(prof_res_bm[ICE_MAX_NUM_PROFILES], ICE_MAX_FV_WORDS);
+};
+
+/* FW logging configuration */
+struct ice_fw_log_evnt {
+ u8 cfg : 4; /* New event enables to configure */
+ u8 cur : 4; /* Current/active event enables */
+};
+
+struct ice_fw_log_cfg {
+ u8 cq_en : 1; /* FW logging is enabled via the control queue */
+ u8 uart_en : 1; /* FW logging is enabled via UART for all PFs */
+ u8 actv_evnts; /* Cumulation of currently enabled log events */
+
+#define ICE_FW_LOG_EVNT_INFO (ICE_AQC_FW_LOG_INFO_EN >> ICE_AQC_FW_LOG_EN_S)
+#define ICE_FW_LOG_EVNT_INIT (ICE_AQC_FW_LOG_INIT_EN >> ICE_AQC_FW_LOG_EN_S)
+#define ICE_FW_LOG_EVNT_FLOW (ICE_AQC_FW_LOG_FLOW_EN >> ICE_AQC_FW_LOG_EN_S)
+#define ICE_FW_LOG_EVNT_ERR (ICE_AQC_FW_LOG_ERR_EN >> ICE_AQC_FW_LOG_EN_S)
+ struct ice_fw_log_evnt evnts[ICE_AQC_FW_LOG_ID_MAX];
+};
+
+/* Enum defining the different states of the mailbox snapshot in the
+ * PF-VF mailbox overflow detection algorithm. The snapshot can be in
+ * states:
+ * 1. ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT - generate a new static snapshot
+ * within the mailbox buffer.
+ * 2. ICE_MAL_VF_DETECT_STATE_TRAVERSE - iterate through the mailbox snaphot
+ * 3. ICE_MAL_VF_DETECT_STATE_DETECT - track the messages sent per VF via the
+ * mailbox and mark any VFs sending more messages than the threshold limit set.
+ * 4. ICE_MAL_VF_DETECT_STATE_INVALID - Invalid mailbox state set to 0xFFFFFFFF.
+ */
+enum ice_mbx_snapshot_state {
+ ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT = 0,
+ ICE_MAL_VF_DETECT_STATE_TRAVERSE,
+ ICE_MAL_VF_DETECT_STATE_DETECT,
+ ICE_MAL_VF_DETECT_STATE_INVALID = 0xFFFFFFFF,
+};
+
+/* Structure to hold information of the static snapshot and the mailbox
+ * buffer data used to generate and track the snapshot.
+ * 1. state: the state of the mailbox snapshot in the malicious VF
+ * detection state handler ice_mbx_vf_state_handler()
+ * 2. head: head of the mailbox snapshot in a circular mailbox buffer
+ * 3. tail: tail of the mailbox snapshot in a circular mailbox buffer
+ * 4. num_iterations: number of messages traversed in circular mailbox buffer
+ * 5. num_msg_proc: number of messages processed in mailbox
+ * 6. num_pending_arq: number of pending asynchronous messages
+ * 7. max_num_msgs_mbx: maximum messages in mailbox for currently
+ * serviced work item or interrupt.
+ */
+struct ice_mbx_snap_buffer_data {
+ enum ice_mbx_snapshot_state state;
+ u32 head;
+ u32 tail;
+ u32 num_iterations;
+ u16 num_msg_proc;
+ u16 num_pending_arq;
+ u16 max_num_msgs_mbx;
+};
+
+/* Structure to track messages sent by VFs on mailbox:
+ * 1. vf_cntr: a counter array of VFs to track the number of
+ * asynchronous messages sent by each VF
+ * 2. vfcntr_len: number of entries in VF counter array
+ */
+struct ice_mbx_vf_counter {
+ u32 *vf_cntr;
+ u32 vfcntr_len;
+};
+
+/* Structure to hold data relevant to the captured static snapshot
+ * of the PF-VF mailbox.
+ */
+struct ice_mbx_snapshot {
+ struct ice_mbx_snap_buffer_data mbx_buf;
+ struct ice_mbx_vf_counter mbx_vf;
+};
+
+/* Structure to hold data to be used for capturing or updating a
+ * static snapshot.
+ * 1. num_msg_proc: number of messages processed in mailbox
+ * 2. num_pending_arq: number of pending asynchronous messages
+ * 3. max_num_msgs_mbx: maximum messages in mailbox for currently
+ * serviced work item or interrupt.
+ * 4. async_watermark_val: An upper threshold set by caller to determine
+ * if the pending arq count is large enough to assume that there is
+ * the possibility of a mailicious VF.
+ */
+struct ice_mbx_data {
+ u16 num_msg_proc;
+ u16 num_pending_arq;
+ u16 max_num_msgs_mbx;
+ u16 async_watermark_val;
+};
+
+/* Port hardware description */
+struct ice_hw {
+ u8 __iomem *hw_addr;
+ void *back;
+ struct ice_aqc_layer_props *layer_info;
+ struct ice_port_info *port_info;
+ /* PSM clock frequency for calculating RL profile params */
+ u32 psm_clk_freq;
+ u64 debug_mask; /* bitmap for debug mask */
+ enum ice_mac_type mac_type;
+
+ u16 fd_ctr_base; /* FD counter base index */
+
+ /* pci info */
+ u16 device_id;
+ u16 vendor_id;
+ u16 subsystem_device_id;
+ u16 subsystem_vendor_id;
+ u8 revision_id;
+
+ u8 pf_id; /* device profile info */
+
+ u16 max_burst_size; /* driver sets this value */
+
+ /* Tx Scheduler values */
+ u8 num_tx_sched_layers;
+ u8 num_tx_sched_phys_layers;
+ u8 flattened_layers;
+ u8 max_cgds;
+ u8 sw_entry_point_layer;
+ u16 max_children[ICE_AQC_TOPO_MAX_LEVEL_NUM];
+ struct list_head agg_list; /* lists all aggregator */
+
+ struct ice_vsi_ctx *vsi_ctx[ICE_MAX_VSI];
+ u8 evb_veb; /* true for VEB, false for VEPA */
+ u8 reset_ongoing; /* true if HW is in reset, false otherwise */
+ struct ice_bus_info bus;
+ struct ice_flash_info flash;
+ struct ice_hw_dev_caps dev_caps; /* device capabilities */
+ struct ice_hw_func_caps func_caps; /* function capabilities */
+
+ struct ice_switch_info *switch_info; /* switch filter lists */
+
+ /* Control Queue info */
+ struct ice_ctl_q_info adminq;
+ struct ice_ctl_q_info sbq;
+ struct ice_ctl_q_info mailboxq;
+
+ u8 api_branch; /* API branch version */
+ u8 api_maj_ver; /* API major version */
+ u8 api_min_ver; /* API minor version */
+ u8 api_patch; /* API patch version */
+ u8 fw_branch; /* firmware branch version */
+ u8 fw_maj_ver; /* firmware major version */
+ u8 fw_min_ver; /* firmware minor version */
+ u8 fw_patch; /* firmware patch version */
+ u32 fw_build; /* firmware build number */
+
+ struct ice_fw_log_cfg fw_log;
+
+/* Device max aggregate bandwidths corresponding to the GL_PWR_MODE_CTL
+ * register. Used for determining the ITR/INTRL granularity during
+ * initialization.
+ */
+#define ICE_MAX_AGG_BW_200G 0x0
+#define ICE_MAX_AGG_BW_100G 0X1
+#define ICE_MAX_AGG_BW_50G 0x2
+#define ICE_MAX_AGG_BW_25G 0x3
+ /* ITR granularity for different speeds */
+#define ICE_ITR_GRAN_ABOVE_25 2
+#define ICE_ITR_GRAN_MAX_25 4
+ /* ITR granularity in 1 us */
+ u8 itr_gran;
+ /* INTRL granularity for different speeds */
+#define ICE_INTRL_GRAN_ABOVE_25 4
+#define ICE_INTRL_GRAN_MAX_25 8
+ /* INTRL granularity in 1 us */
+ u8 intrl_gran;
+
+#define ICE_PHY_PER_NAC 1
+#define ICE_MAX_QUAD 2
+#define ICE_NUM_QUAD_TYPE 2
+#define ICE_PORTS_PER_QUAD 4
+#define ICE_PHY_0_LAST_QUAD 1
+#define ICE_PORTS_PER_PHY 8
+#define ICE_NUM_EXTERNAL_PORTS ICE_PORTS_PER_PHY
+
+ /* Active package version (currently active) */
+ struct ice_pkg_ver active_pkg_ver;
+ u32 active_track_id;
+ u8 active_pkg_name[ICE_PKG_NAME_SIZE];
+ u8 active_pkg_in_nvm;
+
+ /* Driver's package ver - (from the Ice Metadata section) */
+ struct ice_pkg_ver pkg_ver;
+ u8 pkg_name[ICE_PKG_NAME_SIZE];
+
+ /* Driver's Ice segment format version and ID (from the Ice seg) */
+ struct ice_pkg_ver ice_seg_fmt_ver;
+ u8 ice_seg_id[ICE_SEG_ID_SIZE];
+
+ /* Pointer to the ice segment */
+ struct ice_seg *seg;
+
+ /* Pointer to allocated copy of pkg memory */
+ u8 *pkg_copy;
+ u32 pkg_size;
+
+ /* tunneling info */
+ struct mutex tnl_lock;
+ struct ice_tunnel_table tnl;
+
+ struct udp_tunnel_nic_shared udp_tunnel_shared;
+ struct udp_tunnel_nic_info udp_tunnel_nic;
+
+ /* dvm boost update information */
+ struct ice_dvm_table dvm_upd;
+
+ /* HW block tables */
+ struct ice_blk_info blk[ICE_BLK_COUNT];
+ struct mutex fl_profs_locks[ICE_BLK_COUNT]; /* lock fltr profiles */
+ struct list_head fl_profs[ICE_BLK_COUNT];
+
+ /* Flow Director filter info */
+ int fdir_active_fltr;
+
+ struct mutex fdir_fltr_lock; /* protect Flow Director */
+ struct list_head fdir_list_head;
+
+ /* Book-keeping of side-band filter count per flow-type.
+ * This is used to detect and handle input set changes for
+ * respective flow-type.
+ */
+ u16 fdir_fltr_cnt[ICE_FLTR_PTYPE_MAX];
+
+ struct ice_fd_hw_prof **fdir_prof;
+ DECLARE_BITMAP(fdir_perfect_fltr, ICE_FLTR_PTYPE_MAX);
+ struct mutex rss_locks; /* protect RSS configuration */
+ struct list_head rss_list_head;
+ struct ice_mbx_snapshot mbx_snapshot;
+ DECLARE_BITMAP(hw_ptype, ICE_FLOW_PTYPE_MAX);
+ u8 dvm_ena;
+ u16 io_expander_handle;
+};
+
+/* Statistics collected by each port, VSI, VEB, and S-channel */
+struct ice_eth_stats {
+ u64 rx_bytes; /* gorc */
+ u64 rx_unicast; /* uprc */
+ u64 rx_multicast; /* mprc */
+ u64 rx_broadcast; /* bprc */
+ u64 rx_discards; /* rdpc */
+ u64 rx_unknown_protocol; /* rupp */
+ u64 tx_bytes; /* gotc */
+ u64 tx_unicast; /* uptc */
+ u64 tx_multicast; /* mptc */
+ u64 tx_broadcast; /* bptc */
+ u64 tx_discards; /* tdpc */
+ u64 tx_errors; /* tepc */
+};
+
+#define ICE_MAX_UP 8
+
+/* Statistics collected by the MAC */
+struct ice_hw_port_stats {
+ /* eth stats collected by the port */
+ struct ice_eth_stats eth;
+ /* additional port specific stats */
+ u64 tx_dropped_link_down; /* tdold */
+ u64 crc_errors; /* crcerrs */
+ u64 illegal_bytes; /* illerrc */
+ u64 error_bytes; /* errbc */
+ u64 mac_local_faults; /* mlfc */
+ u64 mac_remote_faults; /* mrfc */
+ u64 rx_len_errors; /* rlec */
+ u64 link_xon_rx; /* lxonrxc */
+ u64 link_xoff_rx; /* lxoffrxc */
+ u64 link_xon_tx; /* lxontxc */
+ u64 link_xoff_tx; /* lxofftxc */
+ u64 priority_xon_rx[8]; /* pxonrxc[8] */
+ u64 priority_xoff_rx[8]; /* pxoffrxc[8] */
+ u64 priority_xon_tx[8]; /* pxontxc[8] */
+ u64 priority_xoff_tx[8]; /* pxofftxc[8] */
+ u64 priority_xon_2_xoff[8]; /* pxon2offc[8] */
+ u64 rx_size_64; /* prc64 */
+ u64 rx_size_127; /* prc127 */
+ u64 rx_size_255; /* prc255 */
+ u64 rx_size_511; /* prc511 */
+ u64 rx_size_1023; /* prc1023 */
+ u64 rx_size_1522; /* prc1522 */
+ u64 rx_size_big; /* prc9522 */
+ u64 rx_undersize; /* ruc */
+ u64 rx_fragments; /* rfc */
+ u64 rx_oversize; /* roc */
+ u64 rx_jabber; /* rjc */
+ u64 tx_size_64; /* ptc64 */
+ u64 tx_size_127; /* ptc127 */
+ u64 tx_size_255; /* ptc255 */
+ u64 tx_size_511; /* ptc511 */
+ u64 tx_size_1023; /* ptc1023 */
+ u64 tx_size_1522; /* ptc1522 */
+ u64 tx_size_big; /* ptc9522 */
+ /* flow director stats */
+ u32 fd_sb_status;
+ u64 fd_sb_match;
+};
+
+enum ice_sw_fwd_act_type {
+ ICE_FWD_TO_VSI = 0,
+ ICE_FWD_TO_VSI_LIST, /* Do not use this when adding filter */
+ ICE_FWD_TO_Q,
+ ICE_FWD_TO_QGRP,
+ ICE_DROP_PACKET,
+ ICE_INVAL_ACT
+};
+
+struct ice_aq_get_set_rss_lut_params {
+ u16 vsi_handle; /* software VSI handle */
+ u16 lut_size; /* size of the LUT buffer */
+ u8 lut_type; /* type of the LUT (i.e. VSI, PF, Global) */
+ u8 *lut; /* input RSS LUT for set and output RSS LUT for get */
+ u8 global_lut_id; /* only valid when lut_type is global */
+};
+
+/* Checksum and Shadow RAM pointers */
+#define ICE_SR_NVM_CTRL_WORD 0x00
+#define ICE_SR_BOOT_CFG_PTR 0x132
+#define ICE_SR_NVM_WOL_CFG 0x19
+#define ICE_NVM_OROM_VER_OFF 0x02
+#define ICE_SR_PBA_BLOCK_PTR 0x16
+#define ICE_SR_NVM_DEV_STARTER_VER 0x18
+#define ICE_SR_NVM_EETRACK_LO 0x2D
+#define ICE_SR_NVM_EETRACK_HI 0x2E
+#define ICE_NVM_VER_LO_SHIFT 0
+#define ICE_NVM_VER_LO_MASK (0xff << ICE_NVM_VER_LO_SHIFT)
+#define ICE_NVM_VER_HI_SHIFT 12
+#define ICE_NVM_VER_HI_MASK (0xf << ICE_NVM_VER_HI_SHIFT)
+#define ICE_OROM_VER_PATCH_SHIFT 0
+#define ICE_OROM_VER_PATCH_MASK (0xff << ICE_OROM_VER_PATCH_SHIFT)
+#define ICE_OROM_VER_BUILD_SHIFT 8
+#define ICE_OROM_VER_BUILD_MASK (0xffff << ICE_OROM_VER_BUILD_SHIFT)
+#define ICE_OROM_VER_SHIFT 24
+#define ICE_OROM_VER_MASK (0xff << ICE_OROM_VER_SHIFT)
+#define ICE_SR_PFA_PTR 0x40
+#define ICE_SR_1ST_NVM_BANK_PTR 0x42
+#define ICE_SR_NVM_BANK_SIZE 0x43
+#define ICE_SR_1ST_OROM_BANK_PTR 0x44
+#define ICE_SR_OROM_BANK_SIZE 0x45
+#define ICE_SR_NETLIST_BANK_PTR 0x46
+#define ICE_SR_NETLIST_BANK_SIZE 0x47
+#define ICE_SR_SECTOR_SIZE_IN_WORDS 0x800
+
+/* CSS Header words */
+#define ICE_NVM_CSS_SREV_L 0x14
+#define ICE_NVM_CSS_SREV_H 0x15
+
+/* Length of CSS header section in words */
+#define ICE_CSS_HEADER_LENGTH 330
+
+/* Offset of Shadow RAM copy in the NVM bank area. */
+#define ICE_NVM_SR_COPY_WORD_OFFSET roundup(ICE_CSS_HEADER_LENGTH, 32)
+
+/* Size in bytes of Option ROM trailer */
+#define ICE_NVM_OROM_TRAILER_LENGTH (2 * ICE_CSS_HEADER_LENGTH)
+
+/* The Link Topology Netlist section is stored as a series of words. It is
+ * stored in the NVM as a TLV, with the first two words containing the type
+ * and length.
+ */
+#define ICE_NETLIST_LINK_TOPO_MOD_ID 0x011B
+#define ICE_NETLIST_TYPE_OFFSET 0x0000
+#define ICE_NETLIST_LEN_OFFSET 0x0001
+
+/* The Link Topology section follows the TLV header. When reading the netlist
+ * using ice_read_netlist_module, we need to account for the 2-word TLV
+ * header.
+ */
+#define ICE_NETLIST_LINK_TOPO_OFFSET(n) ((n) + 2)
+
+#define ICE_LINK_TOPO_MODULE_LEN ICE_NETLIST_LINK_TOPO_OFFSET(0x0000)
+#define ICE_LINK_TOPO_NODE_COUNT ICE_NETLIST_LINK_TOPO_OFFSET(0x0001)
+
+#define ICE_LINK_TOPO_NODE_COUNT_M ICE_M(0x3FF, 0)
+
+/* The Netlist ID Block is located after all of the Link Topology nodes. */
+#define ICE_NETLIST_ID_BLK_SIZE 0x30
+#define ICE_NETLIST_ID_BLK_OFFSET(n) ICE_NETLIST_LINK_TOPO_OFFSET(0x0004 + 2 * (n))
+
+/* netlist ID block field offsets (word offsets) */
+#define ICE_NETLIST_ID_BLK_MAJOR_VER_LOW 0x02
+#define ICE_NETLIST_ID_BLK_MAJOR_VER_HIGH 0x03
+#define ICE_NETLIST_ID_BLK_MINOR_VER_LOW 0x04
+#define ICE_NETLIST_ID_BLK_MINOR_VER_HIGH 0x05
+#define ICE_NETLIST_ID_BLK_TYPE_LOW 0x06
+#define ICE_NETLIST_ID_BLK_TYPE_HIGH 0x07
+#define ICE_NETLIST_ID_BLK_REV_LOW 0x08
+#define ICE_NETLIST_ID_BLK_REV_HIGH 0x09
+#define ICE_NETLIST_ID_BLK_SHA_HASH_WORD(n) (0x0A + (n))
+#define ICE_NETLIST_ID_BLK_CUST_VER 0x2F
+
+/* Auxiliary field, mask, and shift definition for Shadow RAM and NVM Flash */
+#define ICE_SR_CTRL_WORD_1_S 0x06
+#define ICE_SR_CTRL_WORD_1_M (0x03 << ICE_SR_CTRL_WORD_1_S)
+#define ICE_SR_CTRL_WORD_VALID 0x1
+#define ICE_SR_CTRL_WORD_OROM_BANK BIT(3)
+#define ICE_SR_CTRL_WORD_NETLIST_BANK BIT(4)
+#define ICE_SR_CTRL_WORD_NVM_BANK BIT(5)
+
+#define ICE_SR_NVM_PTR_4KB_UNITS BIT(15)
+
+/* Link override related */
+#define ICE_SR_PFA_LINK_OVERRIDE_WORDS 10
+#define ICE_SR_PFA_LINK_OVERRIDE_PHY_WORDS 4
+#define ICE_SR_PFA_LINK_OVERRIDE_OFFSET 2
+#define ICE_SR_PFA_LINK_OVERRIDE_FEC_OFFSET 1
+#define ICE_SR_PFA_LINK_OVERRIDE_PHY_OFFSET 2
+#define ICE_FW_API_LINK_OVERRIDE_MAJ 1
+#define ICE_FW_API_LINK_OVERRIDE_MIN 5
+#define ICE_FW_API_LINK_OVERRIDE_PATCH 2
+
+#define ICE_SR_WORDS_IN_1KB 512
+
+/* Hash redirection LUT for VSI - maximum array size */
+#define ICE_VSIQF_HLUT_ARRAY_SIZE ((VSIQF_HLUT_MAX_INDEX + 1) * 4)
+
+/* AQ API version for LLDP_FILTER_CONTROL */
+#define ICE_FW_API_LLDP_FLTR_MAJ 1
+#define ICE_FW_API_LLDP_FLTR_MIN 7
+#define ICE_FW_API_LLDP_FLTR_PATCH 1
+
+/* AQ API version for report default configuration */
+#define ICE_FW_API_REPORT_DFLT_CFG_MAJ 1
+#define ICE_FW_API_REPORT_DFLT_CFG_MIN 7
+#define ICE_FW_API_REPORT_DFLT_CFG_PATCH 3
+
+#endif /* _ICE_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
new file mode 100644
index 000000000..9dbe6e9bb
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
@@ -0,0 +1,1143 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022, Intel Corporation. */
+
+#include "ice_vf_lib_private.h"
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
+#include "ice_virtchnl_allowlist.h"
+
+/* Public functions which may be accessed by all driver files */
+
+/**
+ * ice_get_vf_by_id - Get pointer to VF by ID
+ * @pf: the PF private structure
+ * @vf_id: the VF ID to locate
+ *
+ * Locate and return a pointer to the VF structure associated with a given ID.
+ * Returns NULL if the ID does not have a valid VF structure associated with
+ * it.
+ *
+ * This function takes a reference to the VF, which must be released by
+ * calling ice_put_vf() once the caller is finished accessing the VF structure
+ * returned.
+ */
+struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id)
+{
+ struct ice_vf *vf;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(pf->vfs.table, vf, entry, vf_id) {
+ if (vf->vf_id == vf_id) {
+ struct ice_vf *found;
+
+ if (kref_get_unless_zero(&vf->refcnt))
+ found = vf;
+ else
+ found = NULL;
+
+ rcu_read_unlock();
+ return found;
+ }
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+/**
+ * ice_release_vf - Release VF associated with a refcount
+ * @ref: the kref decremented to zero
+ *
+ * Callback function for kref_put to release a VF once its reference count has
+ * hit zero.
+ */
+static void ice_release_vf(struct kref *ref)
+{
+ struct ice_vf *vf = container_of(ref, struct ice_vf, refcnt);
+
+ vf->vf_ops->free(vf);
+}
+
+/**
+ * ice_put_vf - Release a reference to a VF
+ * @vf: the VF structure to decrease reference count on
+ *
+ * Decrease the reference count for a VF, and free the entry if it is no
+ * longer in use.
+ *
+ * This must be called after ice_get_vf_by_id() once the reference to the VF
+ * structure is no longer used. Otherwise, the VF structure will never be
+ * freed.
+ */
+void ice_put_vf(struct ice_vf *vf)
+{
+ kref_put(&vf->refcnt, ice_release_vf);
+}
+
+/**
+ * ice_has_vfs - Return true if the PF has any associated VFs
+ * @pf: the PF private structure
+ *
+ * Return whether or not the PF has any allocated VFs.
+ *
+ * Note that this function only guarantees that there are no VFs at the point
+ * of calling it. It does not guarantee that no more VFs will be added.
+ */
+bool ice_has_vfs(struct ice_pf *pf)
+{
+ /* A simple check that the hash table is not empty does not require
+ * the mutex or rcu_read_lock.
+ */
+ return !hash_empty(pf->vfs.table);
+}
+
+/**
+ * ice_get_num_vfs - Get number of allocated VFs
+ * @pf: the PF private structure
+ *
+ * Return the total number of allocated VFs. NOTE: VF IDs are not guaranteed
+ * to be contiguous. Do not assume that a VF ID is guaranteed to be less than
+ * the output of this function.
+ */
+u16 ice_get_num_vfs(struct ice_pf *pf)
+{
+ struct ice_vf *vf;
+ unsigned int bkt;
+ u16 num_vfs = 0;
+
+ rcu_read_lock();
+ ice_for_each_vf_rcu(pf, bkt, vf)
+ num_vfs++;
+ rcu_read_unlock();
+
+ return num_vfs;
+}
+
+/**
+ * ice_get_vf_vsi - get VF's VSI based on the stored index
+ * @vf: VF used to get VSI
+ */
+struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf)
+{
+ if (vf->lan_vsi_idx == ICE_NO_VSI)
+ return NULL;
+
+ return vf->pf->vsi[vf->lan_vsi_idx];
+}
+
+/**
+ * ice_is_vf_disabled
+ * @vf: pointer to the VF info
+ *
+ * If the PF has been disabled, there is no need resetting VF until PF is
+ * active again. Similarly, if the VF has been disabled, this means something
+ * else is resetting the VF, so we shouldn't continue.
+ *
+ * Returns true if the caller should consider the VF as disabled whether
+ * because that single VF is explicitly disabled or because the PF is
+ * currently disabled.
+ */
+bool ice_is_vf_disabled(struct ice_vf *vf)
+{
+ struct ice_pf *pf = vf->pf;
+
+ return (test_bit(ICE_VF_DIS, pf->state) ||
+ test_bit(ICE_VF_STATE_DIS, vf->vf_states));
+}
+
+/**
+ * ice_wait_on_vf_reset - poll to make sure a given VF is ready after reset
+ * @vf: The VF being resseting
+ *
+ * The max poll time is about ~800ms, which is about the maximum time it takes
+ * for a VF to be reset and/or a VF driver to be removed.
+ */
+static void ice_wait_on_vf_reset(struct ice_vf *vf)
+{
+ int i;
+
+ for (i = 0; i < ICE_MAX_VF_RESET_TRIES; i++) {
+ if (test_bit(ICE_VF_STATE_INIT, vf->vf_states))
+ break;
+ msleep(ICE_MAX_VF_RESET_SLEEP_MS);
+ }
+}
+
+/**
+ * ice_check_vf_ready_for_cfg - check if VF is ready to be configured/queried
+ * @vf: VF to check if it's ready to be configured/queried
+ *
+ * The purpose of this function is to make sure the VF is not in reset, not
+ * disabled, and initialized so it can be configured and/or queried by a host
+ * administrator.
+ */
+int ice_check_vf_ready_for_cfg(struct ice_vf *vf)
+{
+ ice_wait_on_vf_reset(vf);
+
+ if (ice_is_vf_disabled(vf))
+ return -EINVAL;
+
+ if (ice_check_vf_init(vf))
+ return -EBUSY;
+
+ return 0;
+}
+
+/**
+ * ice_trigger_vf_reset - Reset a VF on HW
+ * @vf: pointer to the VF structure
+ * @is_vflr: true if VFLR was issued, false if not
+ * @is_pfr: true if the reset was triggered due to a previous PFR
+ *
+ * Trigger hardware to start a reset for a particular VF. Expects the caller
+ * to wait the proper amount of time to allow hardware to reset the VF before
+ * it cleans up and restores VF functionality.
+ */
+static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
+{
+ /* Inform VF that it is no longer active, as a warning */
+ clear_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
+
+ /* Disable VF's configuration API during reset. The flag is re-enabled
+ * when it's safe again to access VF's VSI.
+ */
+ clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
+
+ /* VF_MBX_ARQLEN and VF_MBX_ATQLEN are cleared by PFR, so the driver
+ * needs to clear them in the case of VFR/VFLR. If this is done for
+ * PFR, it can mess up VF resets because the VF driver may already
+ * have started cleanup by the time we get here.
+ */
+ if (!is_pfr)
+ vf->vf_ops->clear_mbx_register(vf);
+
+ vf->vf_ops->trigger_reset_register(vf, is_vflr);
+}
+
+static void ice_vf_clear_counters(struct ice_vf *vf)
+{
+ struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+
+ if (vsi)
+ vsi->num_vlan = 0;
+
+ vf->num_mac = 0;
+ memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events));
+ memset(&vf->mdd_rx_events, 0, sizeof(vf->mdd_rx_events));
+}
+
+/**
+ * ice_vf_pre_vsi_rebuild - tasks to be done prior to VSI rebuild
+ * @vf: VF to perform pre VSI rebuild tasks
+ *
+ * These tasks are items that don't need to be amortized since they are most
+ * likely called in a for loop with all VF(s) in the reset_all_vfs() case.
+ */
+static void ice_vf_pre_vsi_rebuild(struct ice_vf *vf)
+{
+ ice_vf_clear_counters(vf);
+ vf->vf_ops->clear_reset_trigger(vf);
+}
+
+/**
+ * ice_vf_rebuild_vsi - rebuild the VF's VSI
+ * @vf: VF to rebuild the VSI for
+ *
+ * This is only called when all VF(s) are being reset (i.e. PCIe Reset on the
+ * host, PFR, CORER, etc.).
+ */
+static int ice_vf_rebuild_vsi(struct ice_vf *vf)
+{
+ struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+ struct ice_pf *pf = vf->pf;
+
+ if (WARN_ON(!vsi))
+ return -EINVAL;
+
+ if (ice_vsi_rebuild(vsi, true)) {
+ dev_err(ice_pf_to_dev(pf), "failed to rebuild VF %d VSI\n",
+ vf->vf_id);
+ return -EIO;
+ }
+ /* vsi->idx will remain the same in this case so don't update
+ * vf->lan_vsi_idx
+ */
+ vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
+ vf->lan_vsi_num = vsi->vsi_num;
+
+ return 0;
+}
+
+/**
+ * ice_is_any_vf_in_unicast_promisc - check if any VF(s)
+ * are in unicast promiscuous mode
+ * @pf: PF structure for accessing VF(s)
+ *
+ * Return false if no VF(s) are in unicast promiscuous mode,
+ * else return true
+ */
+bool ice_is_any_vf_in_unicast_promisc(struct ice_pf *pf)
+{
+ bool is_vf_promisc = false;
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ rcu_read_lock();
+ ice_for_each_vf_rcu(pf, bkt, vf) {
+ /* found a VF that has promiscuous mode configured */
+ if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states)) {
+ is_vf_promisc = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return is_vf_promisc;
+}
+
+/**
+ * ice_vf_get_promisc_masks - Calculate masks for promiscuous modes
+ * @vf: the VF pointer
+ * @vsi: the VSI to configure
+ * @ucast_m: promiscuous mask to apply to unicast
+ * @mcast_m: promiscuous mask to apply to multicast
+ *
+ * Decide which mask should be used for unicast and multicast filter,
+ * based on presence of VLANs
+ */
+void
+ice_vf_get_promisc_masks(struct ice_vf *vf, struct ice_vsi *vsi,
+ u8 *ucast_m, u8 *mcast_m)
+{
+ if (ice_vf_is_port_vlan_ena(vf) ||
+ ice_vsi_has_non_zero_vlans(vsi)) {
+ *mcast_m = ICE_MCAST_VLAN_PROMISC_BITS;
+ *ucast_m = ICE_UCAST_VLAN_PROMISC_BITS;
+ } else {
+ *mcast_m = ICE_MCAST_PROMISC_BITS;
+ *ucast_m = ICE_UCAST_PROMISC_BITS;
+ }
+}
+
+/**
+ * ice_vf_clear_all_promisc_modes - Clear promisc/allmulticast on VF VSI
+ * @vf: the VF pointer
+ * @vsi: the VSI to configure
+ *
+ * Clear all promiscuous/allmulticast filters for a VF
+ */
+static int
+ice_vf_clear_all_promisc_modes(struct ice_vf *vf, struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vf->pf;
+ u8 ucast_m, mcast_m;
+ int ret = 0;
+
+ ice_vf_get_promisc_masks(vf, vsi, &ucast_m, &mcast_m);
+ if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states)) {
+ if (!test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags)) {
+ if (ice_is_dflt_vsi_in_use(vsi->port_info))
+ ret = ice_clear_dflt_vsi(vsi);
+ } else {
+ ret = ice_vf_clear_vsi_promisc(vf, vsi, ucast_m);
+ }
+
+ if (ret) {
+ dev_err(ice_pf_to_dev(vf->pf), "Disabling promiscuous mode failed\n");
+ } else {
+ clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
+ dev_info(ice_pf_to_dev(vf->pf), "Disabling promiscuous mode succeeded\n");
+ }
+ }
+
+ if (test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) {
+ ret = ice_vf_clear_vsi_promisc(vf, vsi, mcast_m);
+ if (ret) {
+ dev_err(ice_pf_to_dev(vf->pf), "Disabling allmulticast mode failed\n");
+ } else {
+ clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
+ dev_info(ice_pf_to_dev(vf->pf), "Disabling allmulticast mode succeeded\n");
+ }
+ }
+ return ret;
+}
+
+/**
+ * ice_vf_set_vsi_promisc - Enable promiscuous mode for a VF VSI
+ * @vf: the VF to configure
+ * @vsi: the VF's VSI
+ * @promisc_m: the promiscuous mode to enable
+ */
+int
+ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ int status;
+
+ if (ice_vf_is_port_vlan_ena(vf))
+ status = ice_fltr_set_vsi_promisc(hw, vsi->idx, promisc_m,
+ ice_vf_get_port_vlan_id(vf));
+ else if (ice_vsi_has_non_zero_vlans(vsi))
+ status = ice_fltr_set_vlan_vsi_promisc(hw, vsi, promisc_m);
+ else
+ status = ice_fltr_set_vsi_promisc(hw, vsi->idx, promisc_m, 0);
+
+ if (status && status != -EEXIST) {
+ dev_err(ice_pf_to_dev(vsi->back), "enable Tx/Rx filter promiscuous mode on VF-%u failed, error: %d\n",
+ vf->vf_id, status);
+ return status;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vf_clear_vsi_promisc - Disable promiscuous mode for a VF VSI
+ * @vf: the VF to configure
+ * @vsi: the VF's VSI
+ * @promisc_m: the promiscuous mode to disable
+ */
+int
+ice_vf_clear_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ int status;
+
+ if (ice_vf_is_port_vlan_ena(vf))
+ status = ice_fltr_clear_vsi_promisc(hw, vsi->idx, promisc_m,
+ ice_vf_get_port_vlan_id(vf));
+ else if (ice_vsi_has_non_zero_vlans(vsi))
+ status = ice_fltr_clear_vlan_vsi_promisc(hw, vsi, promisc_m);
+ else
+ status = ice_fltr_clear_vsi_promisc(hw, vsi->idx, promisc_m, 0);
+
+ if (status && status != -ENOENT) {
+ dev_err(ice_pf_to_dev(vsi->back), "disable Tx/Rx filter promiscuous mode on VF-%u failed, error: %d\n",
+ vf->vf_id, status);
+ return status;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_reset_all_vfs - reset all allocated VFs in one go
+ * @pf: pointer to the PF structure
+ *
+ * Reset all VFs at once, in response to a PF or other device reset.
+ *
+ * First, tell the hardware to reset each VF, then do all the waiting in one
+ * chunk, and finally finish restoring each VF after the wait. This is useful
+ * during PF routines which need to reset all VFs, as otherwise it must perform
+ * these resets in a serialized fashion.
+ */
+void ice_reset_all_vfs(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ /* If we don't have any VFs, then there is nothing to reset */
+ if (!ice_has_vfs(pf))
+ return;
+
+ mutex_lock(&pf->vfs.table_lock);
+
+ /* clear all malicious info if the VFs are getting reset */
+ ice_for_each_vf(pf, bkt, vf)
+ if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs,
+ ICE_MAX_SRIOV_VFS, vf->vf_id))
+ dev_dbg(dev, "failed to clear malicious VF state for VF %u\n",
+ vf->vf_id);
+
+ /* If VFs have been disabled, there is no need to reset */
+ if (test_and_set_bit(ICE_VF_DIS, pf->state)) {
+ mutex_unlock(&pf->vfs.table_lock);
+ return;
+ }
+
+ /* Begin reset on all VFs at once */
+ ice_for_each_vf(pf, bkt, vf)
+ ice_trigger_vf_reset(vf, true, true);
+
+ /* HW requires some time to make sure it can flush the FIFO for a VF
+ * when it resets it. Now that we've triggered all of the VFs, iterate
+ * the table again and wait for each VF to complete.
+ */
+ ice_for_each_vf(pf, bkt, vf) {
+ if (!vf->vf_ops->poll_reset_status(vf)) {
+ /* Display a warning if at least one VF didn't manage
+ * to reset in time, but continue on with the
+ * operation.
+ */
+ dev_warn(dev, "VF %u reset check timeout\n", vf->vf_id);
+ break;
+ }
+ }
+
+ /* free VF resources to begin resetting the VSI state */
+ ice_for_each_vf(pf, bkt, vf) {
+ mutex_lock(&vf->cfg_lock);
+
+ vf->driver_caps = 0;
+ ice_vc_set_default_allowlist(vf);
+
+ ice_vf_fdir_exit(vf);
+ ice_vf_fdir_init(vf);
+ /* clean VF control VSI when resetting VFs since it should be
+ * setup only when VF creates its first FDIR rule.
+ */
+ if (vf->ctrl_vsi_idx != ICE_NO_VSI)
+ ice_vf_ctrl_invalidate_vsi(vf);
+
+ ice_vf_pre_vsi_rebuild(vf);
+ ice_vf_rebuild_vsi(vf);
+ vf->vf_ops->post_vsi_rebuild(vf);
+
+ mutex_unlock(&vf->cfg_lock);
+ }
+
+ if (ice_is_eswitch_mode_switchdev(pf))
+ if (ice_eswitch_rebuild(pf))
+ dev_warn(dev, "eswitch rebuild failed\n");
+
+ ice_flush(hw);
+ clear_bit(ICE_VF_DIS, pf->state);
+
+ mutex_unlock(&pf->vfs.table_lock);
+}
+
+/**
+ * ice_notify_vf_reset - Notify VF of a reset event
+ * @vf: pointer to the VF structure
+ */
+static void ice_notify_vf_reset(struct ice_vf *vf)
+{
+ struct ice_hw *hw = &vf->pf->hw;
+ struct virtchnl_pf_event pfe;
+
+ /* Bail out if VF is in disabled state, neither initialized, nor active
+ * state - otherwise proceed with notifications
+ */
+ if ((!test_bit(ICE_VF_STATE_INIT, vf->vf_states) &&
+ !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) ||
+ test_bit(ICE_VF_STATE_DIS, vf->vf_states))
+ return;
+
+ pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
+ pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM;
+ ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT,
+ VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, sizeof(pfe),
+ NULL);
+}
+
+/**
+ * ice_reset_vf - Reset a particular VF
+ * @vf: pointer to the VF structure
+ * @flags: flags controlling behavior of the reset
+ *
+ * Flags:
+ * ICE_VF_RESET_VFLR - Indicates a reset is due to VFLR event
+ * ICE_VF_RESET_NOTIFY - Send VF a notification prior to reset
+ * ICE_VF_RESET_LOCK - Acquire VF cfg_lock before resetting
+ *
+ * Returns 0 if the VF is currently in reset, if resets are disabled, or if
+ * the VF resets successfully. Returns an error code if the VF fails to
+ * rebuild.
+ */
+int ice_reset_vf(struct ice_vf *vf, u32 flags)
+{
+ struct ice_pf *pf = vf->pf;
+ struct ice_vsi *vsi;
+ struct device *dev;
+ struct ice_hw *hw;
+ int err = 0;
+ bool rsd;
+
+ dev = ice_pf_to_dev(pf);
+ hw = &pf->hw;
+
+ if (flags & ICE_VF_RESET_NOTIFY)
+ ice_notify_vf_reset(vf);
+
+ if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) {
+ dev_dbg(dev, "Trying to reset VF %d, but all VF resets are disabled\n",
+ vf->vf_id);
+ return 0;
+ }
+
+ if (flags & ICE_VF_RESET_LOCK)
+ mutex_lock(&vf->cfg_lock);
+ else
+ lockdep_assert_held(&vf->cfg_lock);
+
+ if (ice_is_vf_disabled(vf)) {
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ dev_dbg(dev, "VF is already removed\n");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);
+
+ if (ice_vsi_is_rx_queue_active(vsi))
+ ice_vsi_stop_all_rx_rings(vsi);
+
+ dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n",
+ vf->vf_id);
+ goto out_unlock;
+ }
+
+ /* Set VF disable bit state here, before triggering reset */
+ set_bit(ICE_VF_STATE_DIS, vf->vf_states);
+ ice_trigger_vf_reset(vf, flags & ICE_VF_RESET_VFLR, false);
+
+ vsi = ice_get_vf_vsi(vf);
+ if (WARN_ON(!vsi)) {
+ err = -EIO;
+ goto out_unlock;
+ }
+
+ ice_dis_vf_qs(vf);
+
+ /* Call Disable LAN Tx queue AQ whether or not queues are
+ * enabled. This is needed for successful completion of VFR.
+ */
+ ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL,
+ NULL, vf->vf_ops->reset_type, vf->vf_id, NULL);
+
+ /* poll VPGEN_VFRSTAT reg to make sure
+ * that reset is complete
+ */
+ rsd = vf->vf_ops->poll_reset_status(vf);
+
+ /* Display a warning if VF didn't manage to reset in time, but need to
+ * continue on with the operation.
+ */
+ if (!rsd)
+ dev_warn(dev, "VF reset check timeout on VF %d\n", vf->vf_id);
+
+ vf->driver_caps = 0;
+ ice_vc_set_default_allowlist(vf);
+
+ /* disable promiscuous modes in case they were enabled
+ * ignore any error if disabling process failed
+ */
+ ice_vf_clear_all_promisc_modes(vf, vsi);
+
+ ice_eswitch_del_vf_mac_rule(vf);
+
+ ice_vf_fdir_exit(vf);
+ ice_vf_fdir_init(vf);
+ /* clean VF control VSI when resetting VF since it should be setup
+ * only when VF creates its first FDIR rule.
+ */
+ if (vf->ctrl_vsi_idx != ICE_NO_VSI)
+ ice_vf_ctrl_vsi_release(vf);
+
+ ice_vf_pre_vsi_rebuild(vf);
+
+ if (vf->vf_ops->vsi_rebuild(vf)) {
+ dev_err(dev, "Failed to release and setup the VF%u's VSI\n",
+ vf->vf_id);
+ err = -EFAULT;
+ goto out_unlock;
+ }
+
+ vf->vf_ops->post_vsi_rebuild(vf);
+ vsi = ice_get_vf_vsi(vf);
+ if (WARN_ON(!vsi)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ ice_eswitch_update_repr(vsi);
+ ice_eswitch_replay_vf_mac_rule(vf);
+
+ /* if the VF has been reset allow it to come up again */
+ if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs,
+ ICE_MAX_SRIOV_VFS, vf->vf_id))
+ dev_dbg(dev, "failed to clear malicious VF state for VF %u\n",
+ vf->vf_id);
+
+out_unlock:
+ if (flags & ICE_VF_RESET_LOCK)
+ mutex_unlock(&vf->cfg_lock);
+
+ return err;
+}
+
+/**
+ * ice_set_vf_state_qs_dis - Set VF queues state to disabled
+ * @vf: pointer to the VF structure
+ */
+static void ice_set_vf_state_qs_dis(struct ice_vf *vf)
+{
+ /* Clear Rx/Tx enabled queues flag */
+ bitmap_zero(vf->txq_ena, ICE_MAX_RSS_QS_PER_VF);
+ bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF);
+ clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
+}
+
+/**
+ * ice_set_vf_state_dis - Set VF state to disabled
+ * @vf: pointer to the VF structure
+ */
+void ice_set_vf_state_dis(struct ice_vf *vf)
+{
+ ice_set_vf_state_qs_dis(vf);
+ vf->vf_ops->clear_reset_state(vf);
+}
+
+/* Private functions only accessed from other virtualization files */
+
+/**
+ * ice_dis_vf_qs - Disable the VF queues
+ * @vf: pointer to the VF structure
+ */
+void ice_dis_vf_qs(struct ice_vf *vf)
+{
+ struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+
+ if (WARN_ON(!vsi))
+ return;
+
+ ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);
+ ice_vsi_stop_all_rx_rings(vsi);
+ ice_set_vf_state_qs_dis(vf);
+}
+
+/**
+ * ice_check_vf_init - helper to check if VF init complete
+ * @vf: the pointer to the VF to check
+ */
+int ice_check_vf_init(struct ice_vf *vf)
+{
+ struct ice_pf *pf = vf->pf;
+
+ if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+ dev_err(ice_pf_to_dev(pf), "VF ID: %u in reset. Try again.\n",
+ vf->vf_id);
+ return -EBUSY;
+ }
+ return 0;
+}
+
+/**
+ * ice_vf_get_port_info - Get the VF's port info structure
+ * @vf: VF used to get the port info structure for
+ */
+struct ice_port_info *ice_vf_get_port_info(struct ice_vf *vf)
+{
+ return vf->pf->hw.port_info;
+}
+
+/**
+ * ice_cfg_mac_antispoof - Configure MAC antispoof checking behavior
+ * @vsi: the VSI to configure
+ * @enable: whether to enable or disable the spoof checking
+ *
+ * Configure a VSI to enable (or disable) spoof checking behavior.
+ */
+static int ice_cfg_mac_antispoof(struct ice_vsi *vsi, bool enable)
+{
+ struct ice_vsi_ctx *ctx;
+ int err;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->info.sec_flags = vsi->info.sec_flags;
+ ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+
+ if (enable)
+ ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF;
+ else
+ ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF;
+
+ err = ice_update_vsi(&vsi->back->hw, vsi->idx, ctx, NULL);
+ if (err)
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to configure Tx MAC anti-spoof %s for VSI %d, error %d\n",
+ enable ? "ON" : "OFF", vsi->vsi_num, err);
+ else
+ vsi->info.sec_flags = ctx->info.sec_flags;
+
+ kfree(ctx);
+
+ return err;
+}
+
+/**
+ * ice_vsi_ena_spoofchk - enable Tx spoof checking for this VSI
+ * @vsi: VSI to enable Tx spoof checking for
+ */
+static int ice_vsi_ena_spoofchk(struct ice_vsi *vsi)
+{
+ struct ice_vsi_vlan_ops *vlan_ops;
+ int err = 0;
+
+ vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+
+ /* Allow VF with VLAN 0 only to send all tagged traffic */
+ if (vsi->type != ICE_VSI_VF || ice_vsi_has_non_zero_vlans(vsi)) {
+ err = vlan_ops->ena_tx_filtering(vsi);
+ if (err)
+ return err;
+ }
+
+ return ice_cfg_mac_antispoof(vsi, true);
+}
+
+/**
+ * ice_vsi_dis_spoofchk - disable Tx spoof checking for this VSI
+ * @vsi: VSI to disable Tx spoof checking for
+ */
+static int ice_vsi_dis_spoofchk(struct ice_vsi *vsi)
+{
+ struct ice_vsi_vlan_ops *vlan_ops;
+ int err;
+
+ vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+
+ err = vlan_ops->dis_tx_filtering(vsi);
+ if (err)
+ return err;
+
+ return ice_cfg_mac_antispoof(vsi, false);
+}
+
+/**
+ * ice_vsi_apply_spoofchk - Apply Tx spoof checking setting to a VSI
+ * @vsi: VSI associated to the VF
+ * @enable: whether to enable or disable the spoof checking
+ */
+int ice_vsi_apply_spoofchk(struct ice_vsi *vsi, bool enable)
+{
+ int err;
+
+ if (enable)
+ err = ice_vsi_ena_spoofchk(vsi);
+ else
+ err = ice_vsi_dis_spoofchk(vsi);
+
+ return err;
+}
+
+/**
+ * ice_is_vf_trusted
+ * @vf: pointer to the VF info
+ */
+bool ice_is_vf_trusted(struct ice_vf *vf)
+{
+ return test_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
+}
+
+/**
+ * ice_vf_has_no_qs_ena - check if the VF has any Rx or Tx queues enabled
+ * @vf: the VF to check
+ *
+ * Returns true if the VF has no Rx and no Tx queues enabled and returns false
+ * otherwise
+ */
+bool ice_vf_has_no_qs_ena(struct ice_vf *vf)
+{
+ return (!bitmap_weight(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF) &&
+ !bitmap_weight(vf->txq_ena, ICE_MAX_RSS_QS_PER_VF));
+}
+
+/**
+ * ice_is_vf_link_up - check if the VF's link is up
+ * @vf: VF to check if link is up
+ */
+bool ice_is_vf_link_up(struct ice_vf *vf)
+{
+ struct ice_port_info *pi = ice_vf_get_port_info(vf);
+
+ if (ice_check_vf_init(vf))
+ return false;
+
+ if (ice_vf_has_no_qs_ena(vf))
+ return false;
+ else if (vf->link_forced)
+ return vf->link_up;
+ else
+ return pi->phy.link_info.link_info &
+ ICE_AQ_LINK_UP;
+}
+
+/**
+ * ice_vf_set_host_trust_cfg - set trust setting based on pre-reset value
+ * @vf: VF to configure trust setting for
+ */
+static void ice_vf_set_host_trust_cfg(struct ice_vf *vf)
+{
+ if (vf->trusted)
+ set_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
+ else
+ clear_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
+}
+
+/**
+ * ice_vf_rebuild_host_mac_cfg - add broadcast and the VF's perm_addr/LAA
+ * @vf: VF to add MAC filters for
+ *
+ * Called after a VF VSI has been re-added/rebuilt during reset. The PF driver
+ * always re-adds a broadcast filter and the VF's perm_addr/LAA after reset.
+ */
+static int ice_vf_rebuild_host_mac_cfg(struct ice_vf *vf)
+{
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+ u8 broadcast[ETH_ALEN];
+ int status;
+
+ if (WARN_ON(!vsi))
+ return -EINVAL;
+
+ if (ice_is_eswitch_mode_switchdev(vf->pf))
+ return 0;
+
+ eth_broadcast_addr(broadcast);
+ status = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI);
+ if (status) {
+ dev_err(dev, "failed to add broadcast MAC filter for VF %u, error %d\n",
+ vf->vf_id, status);
+ return status;
+ }
+
+ vf->num_mac++;
+
+ if (is_valid_ether_addr(vf->hw_lan_addr.addr)) {
+ status = ice_fltr_add_mac(vsi, vf->hw_lan_addr.addr,
+ ICE_FWD_TO_VSI);
+ if (status) {
+ dev_err(dev, "failed to add default unicast MAC filter %pM for VF %u, error %d\n",
+ &vf->hw_lan_addr.addr[0], vf->vf_id,
+ status);
+ return status;
+ }
+ vf->num_mac++;
+
+ ether_addr_copy(vf->dev_lan_addr.addr, vf->hw_lan_addr.addr);
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vf_rebuild_host_vlan_cfg - add VLAN 0 filter or rebuild the Port VLAN
+ * @vf: VF to add MAC filters for
+ * @vsi: Pointer to VSI
+ *
+ * Called after a VF VSI has been re-added/rebuilt during reset. The PF driver
+ * always re-adds either a VLAN 0 or port VLAN based filter after reset.
+ */
+static int ice_vf_rebuild_host_vlan_cfg(struct ice_vf *vf, struct ice_vsi *vsi)
+{
+ struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ int err;
+
+ if (ice_vf_is_port_vlan_ena(vf)) {
+ err = vlan_ops->set_port_vlan(vsi, &vf->port_vlan_info);
+ if (err) {
+ dev_err(dev, "failed to configure port VLAN via VSI parameters for VF %u, error %d\n",
+ vf->vf_id, err);
+ return err;
+ }
+
+ err = vlan_ops->add_vlan(vsi, &vf->port_vlan_info);
+ } else {
+ err = ice_vsi_add_vlan_zero(vsi);
+ }
+
+ if (err) {
+ dev_err(dev, "failed to add VLAN %u filter for VF %u during VF rebuild, error %d\n",
+ ice_vf_is_port_vlan_ena(vf) ?
+ ice_vf_get_port_vlan_id(vf) : 0, vf->vf_id, err);
+ return err;
+ }
+
+ err = vlan_ops->ena_rx_filtering(vsi);
+ if (err)
+ dev_warn(dev, "failed to enable Rx VLAN filtering for VF %d VSI %d during VF rebuild, error %d\n",
+ vf->vf_id, vsi->idx, err);
+
+ return 0;
+}
+
+/**
+ * ice_vf_rebuild_host_tx_rate_cfg - re-apply the Tx rate limiting configuration
+ * @vf: VF to re-apply the configuration for
+ *
+ * Called after a VF VSI has been re-added/rebuild during reset. The PF driver
+ * needs to re-apply the host configured Tx rate limiting configuration.
+ */
+static int ice_vf_rebuild_host_tx_rate_cfg(struct ice_vf *vf)
+{
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+ int err;
+
+ if (WARN_ON(!vsi))
+ return -EINVAL;
+
+ if (vf->min_tx_rate) {
+ err = ice_set_min_bw_limit(vsi, (u64)vf->min_tx_rate * 1000);
+ if (err) {
+ dev_err(dev, "failed to set min Tx rate to %d Mbps for VF %u, error %d\n",
+ vf->min_tx_rate, vf->vf_id, err);
+ return err;
+ }
+ }
+
+ if (vf->max_tx_rate) {
+ err = ice_set_max_bw_limit(vsi, (u64)vf->max_tx_rate * 1000);
+ if (err) {
+ dev_err(dev, "failed to set max Tx rate to %d Mbps for VF %u, error %d\n",
+ vf->max_tx_rate, vf->vf_id, err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vf_rebuild_aggregator_node_cfg - rebuild aggregator node config
+ * @vsi: Pointer to VSI
+ *
+ * This function moves VSI into corresponding scheduler aggregator node
+ * based on cached value of "aggregator node info" per VSI
+ */
+static void ice_vf_rebuild_aggregator_node_cfg(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ int status;
+
+ if (!vsi->agg_node)
+ return;
+
+ dev = ice_pf_to_dev(pf);
+ if (vsi->agg_node->num_vsis == ICE_MAX_VSIS_IN_AGG_NODE) {
+ dev_dbg(dev,
+ "agg_id %u already has reached max_num_vsis %u\n",
+ vsi->agg_node->agg_id, vsi->agg_node->num_vsis);
+ return;
+ }
+
+ status = ice_move_vsi_to_agg(pf->hw.port_info, vsi->agg_node->agg_id,
+ vsi->idx, vsi->tc_cfg.ena_tc);
+ if (status)
+ dev_dbg(dev, "unable to move VSI idx %u into aggregator %u node",
+ vsi->idx, vsi->agg_node->agg_id);
+ else
+ vsi->agg_node->num_vsis++;
+}
+
+/**
+ * ice_vf_rebuild_host_cfg - host admin configuration is persistent across reset
+ * @vf: VF to rebuild host configuration on
+ */
+void ice_vf_rebuild_host_cfg(struct ice_vf *vf)
+{
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+
+ if (WARN_ON(!vsi))
+ return;
+
+ ice_vf_set_host_trust_cfg(vf);
+
+ if (ice_vf_rebuild_host_mac_cfg(vf))
+ dev_err(dev, "failed to rebuild default MAC configuration for VF %d\n",
+ vf->vf_id);
+
+ if (ice_vf_rebuild_host_vlan_cfg(vf, vsi))
+ dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n",
+ vf->vf_id);
+
+ if (ice_vf_rebuild_host_tx_rate_cfg(vf))
+ dev_err(dev, "failed to rebuild Tx rate limiting configuration for VF %u\n",
+ vf->vf_id);
+
+ if (ice_vsi_apply_spoofchk(vsi, vf->spoofchk))
+ dev_err(dev, "failed to rebuild spoofchk configuration for VF %d\n",
+ vf->vf_id);
+
+ /* rebuild aggregator node config for main VF VSI */
+ ice_vf_rebuild_aggregator_node_cfg(vsi);
+}
+
+/**
+ * ice_vf_ctrl_invalidate_vsi - invalidate ctrl_vsi_idx to remove VSI access
+ * @vf: VF that control VSI is being invalidated on
+ */
+void ice_vf_ctrl_invalidate_vsi(struct ice_vf *vf)
+{
+ vf->ctrl_vsi_idx = ICE_NO_VSI;
+}
+
+/**
+ * ice_vf_ctrl_vsi_release - invalidate the VF's control VSI after freeing it
+ * @vf: VF that control VSI is being released on
+ */
+void ice_vf_ctrl_vsi_release(struct ice_vf *vf)
+{
+ ice_vsi_release(vf->pf->vsi[vf->ctrl_vsi_idx]);
+ ice_vf_ctrl_invalidate_vsi(vf);
+}
+
+/**
+ * ice_vf_ctrl_vsi_setup - Set up a VF control VSI
+ * @vf: VF to setup control VSI for
+ *
+ * Returns pointer to the successfully allocated VSI struct on success,
+ * otherwise returns NULL on failure.
+ */
+struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf)
+{
+ struct ice_port_info *pi = ice_vf_get_port_info(vf);
+ struct ice_pf *pf = vf->pf;
+ struct ice_vsi *vsi;
+
+ vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf, NULL);
+ if (!vsi) {
+ dev_err(ice_pf_to_dev(pf), "Failed to create VF control VSI\n");
+ ice_vf_ctrl_invalidate_vsi(vf);
+ }
+
+ return vsi;
+}
+
+/**
+ * ice_vf_invalidate_vsi - invalidate vsi_idx/vsi_num to remove VSI access
+ * @vf: VF to remove access to VSI for
+ */
+void ice_vf_invalidate_vsi(struct ice_vf *vf)
+{
+ vf->lan_vsi_idx = ICE_NO_VSI;
+ vf->lan_vsi_num = ICE_NO_VSI;
+}
+
+/**
+ * ice_vf_set_initialized - VF is ready for VIRTCHNL communication
+ * @vf: VF to set in initialized state
+ *
+ * After this function the VF will be ready to receive/handle the
+ * VIRTCHNL_OP_GET_VF_RESOURCES message
+ */
+void ice_vf_set_initialized(struct ice_vf *vf)
+{
+ ice_set_vf_state_qs_dis(vf);
+ clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
+ clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
+ clear_bit(ICE_VF_STATE_DIS, vf->vf_states);
+ set_bit(ICE_VF_STATE_INIT, vf->vf_states);
+ memset(&vf->vlan_v2_caps, 0, sizeof(vf->vlan_v2_caps));
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
new file mode 100644
index 000000000..9f7fcd8e5
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
@@ -0,0 +1,294 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2021, Intel Corporation. */
+
+#ifndef _ICE_VF_LIB_H_
+#define _ICE_VF_LIB_H_
+
+#include <linux/types.h>
+#include <linux/hashtable.h>
+#include <linux/bitmap.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <net/devlink.h>
+#include <linux/avf/virtchnl.h>
+#include "ice_type.h"
+#include "ice_virtchnl_fdir.h"
+#include "ice_vsi_vlan_ops.h"
+
+#define ICE_MAX_SRIOV_VFS 256
+
+/* VF resource constraints */
+#define ICE_MAX_RSS_QS_PER_VF 16
+
+struct ice_pf;
+struct ice_vf;
+struct ice_virtchnl_ops;
+
+/* VF capabilities */
+enum ice_virtchnl_cap {
+ ICE_VIRTCHNL_VF_CAP_PRIVILEGE = 0,
+};
+
+/* Specific VF states */
+enum ice_vf_states {
+ ICE_VF_STATE_INIT = 0, /* PF is initializing VF */
+ ICE_VF_STATE_ACTIVE, /* VF resources are allocated for use */
+ ICE_VF_STATE_QS_ENA, /* VF queue(s) enabled */
+ ICE_VF_STATE_DIS,
+ ICE_VF_STATE_MC_PROMISC,
+ ICE_VF_STATE_UC_PROMISC,
+ ICE_VF_STATES_NBITS
+};
+
+struct ice_time_mac {
+ unsigned long time_modified;
+ u8 addr[ETH_ALEN];
+};
+
+/* VF MDD events print structure */
+struct ice_mdd_vf_events {
+ u16 count; /* total count of Rx|Tx events */
+ /* count number of the last printed event */
+ u16 last_printed;
+};
+
+/* VF operations */
+struct ice_vf_ops {
+ enum ice_disq_rst_src reset_type;
+ void (*free)(struct ice_vf *vf);
+ void (*clear_reset_state)(struct ice_vf *vf);
+ void (*clear_mbx_register)(struct ice_vf *vf);
+ void (*trigger_reset_register)(struct ice_vf *vf, bool is_vflr);
+ bool (*poll_reset_status)(struct ice_vf *vf);
+ void (*clear_reset_trigger)(struct ice_vf *vf);
+ int (*vsi_rebuild)(struct ice_vf *vf);
+ void (*post_vsi_rebuild)(struct ice_vf *vf);
+};
+
+/* Virtchnl/SR-IOV config info */
+struct ice_vfs {
+ DECLARE_HASHTABLE(table, 8); /* table of VF entries */
+ struct mutex table_lock; /* Lock for protecting the hash table */
+ u16 num_supported; /* max supported VFs on this PF */
+ u16 num_qps_per; /* number of queue pairs per VF */
+ u16 num_msix_per; /* number of MSI-X vectors per VF */
+ unsigned long last_printed_mdd_jiffies; /* MDD message rate limit */
+ DECLARE_BITMAP(malvfs, ICE_MAX_SRIOV_VFS); /* malicious VF indicator */
+};
+
+/* VF information structure */
+struct ice_vf {
+ struct hlist_node entry;
+ struct rcu_head rcu;
+ struct kref refcnt;
+ struct ice_pf *pf;
+
+ /* Used during virtchnl message handling and NDO ops against the VF
+ * that will trigger a VFR
+ */
+ struct mutex cfg_lock;
+
+ u16 vf_id; /* VF ID in the PF space */
+ u16 lan_vsi_idx; /* index into PF struct */
+ u16 ctrl_vsi_idx;
+ struct ice_vf_fdir fdir;
+ /* first vector index of this VF in the PF space */
+ int first_vector_idx;
+ struct ice_sw *vf_sw_id; /* switch ID the VF VSIs connect to */
+ struct virtchnl_version_info vf_ver;
+ u32 driver_caps; /* reported by VF driver */
+ struct virtchnl_ether_addr dev_lan_addr;
+ struct virtchnl_ether_addr hw_lan_addr;
+ struct ice_time_mac legacy_last_added_umac;
+ DECLARE_BITMAP(txq_ena, ICE_MAX_RSS_QS_PER_VF);
+ DECLARE_BITMAP(rxq_ena, ICE_MAX_RSS_QS_PER_VF);
+ struct ice_vlan port_vlan_info; /* Port VLAN ID, QoS, and TPID */
+ struct virtchnl_vlan_caps vlan_v2_caps;
+ u8 pf_set_mac:1; /* VF MAC address set by VMM admin */
+ u8 trusted:1;
+ u8 spoofchk:1;
+ u8 link_forced:1;
+ u8 link_up:1; /* only valid if VF link is forced */
+ /* VSI indices - actual VSI pointers are maintained in the PF structure
+ * When assigned, these will be non-zero, because VSI 0 is always
+ * the main LAN VSI for the PF.
+ */
+ u16 lan_vsi_num; /* ID as used by firmware */
+ unsigned int min_tx_rate; /* Minimum Tx bandwidth limit in Mbps */
+ unsigned int max_tx_rate; /* Maximum Tx bandwidth limit in Mbps */
+ DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */
+
+ unsigned long vf_caps; /* VF's adv. capabilities */
+ u8 num_req_qs; /* num of queue pairs requested by VF */
+ u16 num_mac;
+ u16 num_vf_qs; /* num of queue configured per VF */
+ struct ice_mdd_vf_events mdd_rx_events;
+ struct ice_mdd_vf_events mdd_tx_events;
+ DECLARE_BITMAP(opcodes_allowlist, VIRTCHNL_OP_MAX);
+
+ struct ice_repr *repr;
+ const struct ice_virtchnl_ops *virtchnl_ops;
+ const struct ice_vf_ops *vf_ops;
+
+ /* devlink port data */
+ struct devlink_port devlink_port;
+};
+
+/* Flags for controlling behavior of ice_reset_vf */
+enum ice_vf_reset_flags {
+ ICE_VF_RESET_VFLR = BIT(0), /* Indicate a VFLR reset */
+ ICE_VF_RESET_NOTIFY = BIT(1), /* Notify VF prior to reset */
+ ICE_VF_RESET_LOCK = BIT(2), /* Acquire the VF cfg_lock */
+};
+
+static inline u16 ice_vf_get_port_vlan_id(struct ice_vf *vf)
+{
+ return vf->port_vlan_info.vid;
+}
+
+static inline u8 ice_vf_get_port_vlan_prio(struct ice_vf *vf)
+{
+ return vf->port_vlan_info.prio;
+}
+
+static inline bool ice_vf_is_port_vlan_ena(struct ice_vf *vf)
+{
+ return (ice_vf_get_port_vlan_id(vf) || ice_vf_get_port_vlan_prio(vf));
+}
+
+static inline u16 ice_vf_get_port_vlan_tpid(struct ice_vf *vf)
+{
+ return vf->port_vlan_info.tpid;
+}
+
+/* VF Hash Table access functions
+ *
+ * These functions provide abstraction for interacting with the VF hash table.
+ * In general, direct access to the hash table should be avoided outside of
+ * these functions where possible.
+ *
+ * The VF entries in the hash table are protected by reference counting to
+ * track lifetime of accesses from the table. The ice_get_vf_by_id() function
+ * obtains a reference to the VF structure which must be dropped by using
+ * ice_put_vf().
+ */
+
+/**
+ * ice_for_each_vf - Iterate over each VF entry
+ * @pf: pointer to the PF private structure
+ * @bkt: bucket index used for iteration
+ * @vf: pointer to the VF entry currently being processed in the loop
+ *
+ * The bkt variable is an unsigned integer iterator used to traverse the VF
+ * entries. It is *not* guaranteed to be the VF's vf_id. Do not assume it is.
+ * Use vf->vf_id to get the id number if needed.
+ *
+ * The caller is expected to be under the table_lock mutex for the entire
+ * loop. Use this iterator if your loop is long or if it might sleep.
+ */
+#define ice_for_each_vf(pf, bkt, vf) \
+ hash_for_each((pf)->vfs.table, (bkt), (vf), entry)
+
+/**
+ * ice_for_each_vf_rcu - Iterate over each VF entry protected by RCU
+ * @pf: pointer to the PF private structure
+ * @bkt: bucket index used for iteration
+ * @vf: pointer to the VF entry currently being processed in the loop
+ *
+ * The bkt variable is an unsigned integer iterator used to traverse the VF
+ * entries. It is *not* guaranteed to be the VF's vf_id. Do not assume it is.
+ * Use vf->vf_id to get the id number if needed.
+ *
+ * The caller is expected to be under rcu_read_lock() for the entire loop.
+ * Only use this iterator if your loop is short and you can guarantee it does
+ * not sleep.
+ */
+#define ice_for_each_vf_rcu(pf, bkt, vf) \
+ hash_for_each_rcu((pf)->vfs.table, (bkt), (vf), entry)
+
+#ifdef CONFIG_PCI_IOV
+struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id);
+void ice_put_vf(struct ice_vf *vf);
+bool ice_has_vfs(struct ice_pf *pf);
+u16 ice_get_num_vfs(struct ice_pf *pf);
+struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf);
+bool ice_is_vf_disabled(struct ice_vf *vf);
+int ice_check_vf_ready_for_cfg(struct ice_vf *vf);
+void ice_set_vf_state_dis(struct ice_vf *vf);
+bool ice_is_any_vf_in_unicast_promisc(struct ice_pf *pf);
+void
+ice_vf_get_promisc_masks(struct ice_vf *vf, struct ice_vsi *vsi,
+ u8 *ucast_m, u8 *mcast_m);
+int
+ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m);
+int
+ice_vf_clear_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m);
+int ice_reset_vf(struct ice_vf *vf, u32 flags);
+void ice_reset_all_vfs(struct ice_pf *pf);
+#else /* CONFIG_PCI_IOV */
+static inline struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id)
+{
+ return NULL;
+}
+
+static inline void ice_put_vf(struct ice_vf *vf)
+{
+}
+
+static inline bool ice_has_vfs(struct ice_pf *pf)
+{
+ return false;
+}
+
+static inline u16 ice_get_num_vfs(struct ice_pf *pf)
+{
+ return 0;
+}
+
+static inline struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf)
+{
+ return NULL;
+}
+
+static inline bool ice_is_vf_disabled(struct ice_vf *vf)
+{
+ return true;
+}
+
+static inline int ice_check_vf_ready_for_cfg(struct ice_vf *vf)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void ice_set_vf_state_dis(struct ice_vf *vf)
+{
+}
+
+static inline bool ice_is_any_vf_in_unicast_promisc(struct ice_pf *pf)
+{
+ return false;
+}
+
+static inline int
+ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+ice_vf_clear_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int ice_reset_vf(struct ice_vf *vf, u32 flags)
+{
+ return 0;
+}
+
+static inline void ice_reset_all_vfs(struct ice_pf *pf)
+{
+}
+#endif /* !CONFIG_PCI_IOV */
+
+#endif /* _ICE_VF_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h
new file mode 100644
index 000000000..15887e772
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2021, Intel Corporation. */
+
+#ifndef _ICE_VF_LIB_PRIVATE_H_
+#define _ICE_VF_LIB_PRIVATE_H_
+
+#include "ice_vf_lib.h"
+
+/* This header file is for exposing functions in ice_vf_lib.c to other files
+ * which are also conditionally compiled depending on CONFIG_PCI_IOV.
+ * Functions which may be used by other files should be exposed as part of
+ * ice_vf_lib.h
+ *
+ * Functions in this file are exposed only when CONFIG_PCI_IOV is enabled, and
+ * thus this header must not be included by .c files which may be compiled
+ * with CONFIG_PCI_IOV disabled.
+ *
+ * To avoid this, only include this header file directly within .c files that
+ * are conditionally enabled in the "ice-$(CONFIG_PCI_IOV)" block.
+ */
+
+#ifndef CONFIG_PCI_IOV
+#warning "Only include ice_vf_lib_private.h in CONFIG_PCI_IOV virtualization files"
+#endif
+
+void ice_dis_vf_qs(struct ice_vf *vf);
+int ice_check_vf_init(struct ice_vf *vf);
+struct ice_port_info *ice_vf_get_port_info(struct ice_vf *vf);
+int ice_vsi_apply_spoofchk(struct ice_vsi *vsi, bool enable);
+bool ice_is_vf_trusted(struct ice_vf *vf);
+bool ice_vf_has_no_qs_ena(struct ice_vf *vf);
+bool ice_is_vf_link_up(struct ice_vf *vf);
+void ice_vf_rebuild_host_cfg(struct ice_vf *vf);
+void ice_vf_ctrl_invalidate_vsi(struct ice_vf *vf);
+void ice_vf_ctrl_vsi_release(struct ice_vf *vf);
+struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf);
+void ice_vf_invalidate_vsi(struct ice_vf *vf);
+void ice_vf_set_initialized(struct ice_vf *vf);
+
+#endif /* _ICE_VF_LIB_PRIVATE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c
new file mode 100644
index 000000000..fc8c93fa4
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c
@@ -0,0 +1,532 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_vf_mbx.h"
+
+/**
+ * ice_aq_send_msg_to_vf
+ * @hw: pointer to the hardware structure
+ * @vfid: VF ID to send msg
+ * @v_opcode: opcodes for VF-PF communication
+ * @v_retval: return error code
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @cd: pointer to command details
+ *
+ * Send message to VF driver (0x0802) using mailbox
+ * queue and asynchronously sending message via
+ * ice_sq_send_cmd() function
+ */
+int
+ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval,
+ u8 *msg, u16 msglen, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_pf_vf_msg *cmd;
+ struct ice_aq_desc desc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_mbx_opc_send_msg_to_vf);
+
+ cmd = &desc.params.virt;
+ cmd->id = cpu_to_le32(vfid);
+
+ desc.cookie_high = cpu_to_le32(v_opcode);
+ desc.cookie_low = cpu_to_le32(v_retval);
+
+ if (msglen)
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd);
+}
+
+/**
+ * ice_conv_link_speed_to_virtchnl
+ * @adv_link_support: determines the format of the returned link speed
+ * @link_speed: variable containing the link_speed to be converted
+ *
+ * Convert link speed supported by HW to link speed supported by virtchnl.
+ * If adv_link_support is true, then return link speed in Mbps. Else return
+ * link speed as a VIRTCHNL_LINK_SPEED_* casted to a u32. Note that the caller
+ * needs to cast back to an enum virtchnl_link_speed in the case where
+ * adv_link_support is false, but when adv_link_support is true the caller can
+ * expect the speed in Mbps.
+ */
+u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed)
+{
+ u32 speed;
+
+ if (adv_link_support)
+ switch (link_speed) {
+ case ICE_AQ_LINK_SPEED_10MB:
+ speed = ICE_LINK_SPEED_10MBPS;
+ break;
+ case ICE_AQ_LINK_SPEED_100MB:
+ speed = ICE_LINK_SPEED_100MBPS;
+ break;
+ case ICE_AQ_LINK_SPEED_1000MB:
+ speed = ICE_LINK_SPEED_1000MBPS;
+ break;
+ case ICE_AQ_LINK_SPEED_2500MB:
+ speed = ICE_LINK_SPEED_2500MBPS;
+ break;
+ case ICE_AQ_LINK_SPEED_5GB:
+ speed = ICE_LINK_SPEED_5000MBPS;
+ break;
+ case ICE_AQ_LINK_SPEED_10GB:
+ speed = ICE_LINK_SPEED_10000MBPS;
+ break;
+ case ICE_AQ_LINK_SPEED_20GB:
+ speed = ICE_LINK_SPEED_20000MBPS;
+ break;
+ case ICE_AQ_LINK_SPEED_25GB:
+ speed = ICE_LINK_SPEED_25000MBPS;
+ break;
+ case ICE_AQ_LINK_SPEED_40GB:
+ speed = ICE_LINK_SPEED_40000MBPS;
+ break;
+ case ICE_AQ_LINK_SPEED_50GB:
+ speed = ICE_LINK_SPEED_50000MBPS;
+ break;
+ case ICE_AQ_LINK_SPEED_100GB:
+ speed = ICE_LINK_SPEED_100000MBPS;
+ break;
+ default:
+ speed = ICE_LINK_SPEED_UNKNOWN;
+ break;
+ }
+ else
+ /* Virtchnl speeds are not defined for every speed supported in
+ * the hardware. To maintain compatibility with older AVF
+ * drivers, while reporting the speed the new speed values are
+ * resolved to the closest known virtchnl speeds
+ */
+ switch (link_speed) {
+ case ICE_AQ_LINK_SPEED_10MB:
+ case ICE_AQ_LINK_SPEED_100MB:
+ speed = (u32)VIRTCHNL_LINK_SPEED_100MB;
+ break;
+ case ICE_AQ_LINK_SPEED_1000MB:
+ case ICE_AQ_LINK_SPEED_2500MB:
+ case ICE_AQ_LINK_SPEED_5GB:
+ speed = (u32)VIRTCHNL_LINK_SPEED_1GB;
+ break;
+ case ICE_AQ_LINK_SPEED_10GB:
+ speed = (u32)VIRTCHNL_LINK_SPEED_10GB;
+ break;
+ case ICE_AQ_LINK_SPEED_20GB:
+ speed = (u32)VIRTCHNL_LINK_SPEED_20GB;
+ break;
+ case ICE_AQ_LINK_SPEED_25GB:
+ speed = (u32)VIRTCHNL_LINK_SPEED_25GB;
+ break;
+ case ICE_AQ_LINK_SPEED_40GB:
+ case ICE_AQ_LINK_SPEED_50GB:
+ case ICE_AQ_LINK_SPEED_100GB:
+ speed = (u32)VIRTCHNL_LINK_SPEED_40GB;
+ break;
+ default:
+ speed = (u32)VIRTCHNL_LINK_SPEED_UNKNOWN;
+ break;
+ }
+
+ return speed;
+}
+
+/* The mailbox overflow detection algorithm helps to check if there
+ * is a possibility of a malicious VF transmitting too many MBX messages to the
+ * PF.
+ * 1. The mailbox snapshot structure, ice_mbx_snapshot, is initialized during
+ * driver initialization in ice_init_hw() using ice_mbx_init_snapshot().
+ * The struct ice_mbx_snapshot helps to track and traverse a static window of
+ * messages within the mailbox queue while looking for a malicious VF.
+ *
+ * 2. When the caller starts processing its mailbox queue in response to an
+ * interrupt, the structure ice_mbx_snapshot is expected to be cleared before
+ * the algorithm can be run for the first time for that interrupt. This can be
+ * done via ice_mbx_reset_snapshot().
+ *
+ * 3. For every message read by the caller from the MBX Queue, the caller must
+ * call the detection algorithm's entry function ice_mbx_vf_state_handler().
+ * Before every call to ice_mbx_vf_state_handler() the struct ice_mbx_data is
+ * filled as it is required to be passed to the algorithm.
+ *
+ * 4. Every time a message is read from the MBX queue, a VFId is received which
+ * is passed to the state handler. The boolean output is_malvf of the state
+ * handler ice_mbx_vf_state_handler() serves as an indicator to the caller
+ * whether this VF is malicious or not.
+ *
+ * 5. When a VF is identified to be malicious, the caller can send a message
+ * to the system administrator. The caller can invoke ice_mbx_report_malvf()
+ * to help determine if a malicious VF is to be reported or not. This function
+ * requires the caller to maintain a global bitmap to track all malicious VFs
+ * and pass that to ice_mbx_report_malvf() along with the VFID which was identified
+ * to be malicious by ice_mbx_vf_state_handler().
+ *
+ * 6. The global bitmap maintained by PF can be cleared completely if PF is in
+ * reset or the bit corresponding to a VF can be cleared if that VF is in reset.
+ * When a VF is shut down and brought back up, we assume that the new VF
+ * brought up is not malicious and hence report it if found malicious.
+ *
+ * 7. The function ice_mbx_reset_snapshot() is called to reset the information
+ * in ice_mbx_snapshot for every new mailbox interrupt handled.
+ *
+ * 8. The memory allocated for variables in ice_mbx_snapshot is de-allocated
+ * when driver is unloaded.
+ */
+#define ICE_RQ_DATA_MASK(rq_data) ((rq_data) & PF_MBX_ARQH_ARQH_M)
+/* Using the highest value for an unsigned 16-bit value 0xFFFF to indicate that
+ * the max messages check must be ignored in the algorithm
+ */
+#define ICE_IGNORE_MAX_MSG_CNT 0xFFFF
+
+/**
+ * ice_mbx_traverse - Pass through mailbox snapshot
+ * @hw: pointer to the HW struct
+ * @new_state: new algorithm state
+ *
+ * Traversing the mailbox static snapshot without checking
+ * for malicious VFs.
+ */
+static void
+ice_mbx_traverse(struct ice_hw *hw,
+ enum ice_mbx_snapshot_state *new_state)
+{
+ struct ice_mbx_snap_buffer_data *snap_buf;
+ u32 num_iterations;
+
+ snap_buf = &hw->mbx_snapshot.mbx_buf;
+
+ /* As mailbox buffer is circular, applying a mask
+ * on the incremented iteration count.
+ */
+ num_iterations = ICE_RQ_DATA_MASK(++snap_buf->num_iterations);
+
+ /* Checking either of the below conditions to exit snapshot traversal:
+ * Condition-1: If the number of iterations in the mailbox is equal to
+ * the mailbox head which would indicate that we have reached the end
+ * of the static snapshot.
+ * Condition-2: If the maximum messages serviced in the mailbox for a
+ * given interrupt is the highest possible value then there is no need
+ * to check if the number of messages processed is equal to it. If not
+ * check if the number of messages processed is greater than or equal
+ * to the maximum number of mailbox entries serviced in current work item.
+ */
+ if (num_iterations == snap_buf->head ||
+ (snap_buf->max_num_msgs_mbx < ICE_IGNORE_MAX_MSG_CNT &&
+ ++snap_buf->num_msg_proc >= snap_buf->max_num_msgs_mbx))
+ *new_state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
+}
+
+/**
+ * ice_mbx_detect_malvf - Detect malicious VF in snapshot
+ * @hw: pointer to the HW struct
+ * @vf_id: relative virtual function ID
+ * @new_state: new algorithm state
+ * @is_malvf: boolean output to indicate if VF is malicious
+ *
+ * This function tracks the number of asynchronous messages
+ * sent per VF and marks the VF as malicious if it exceeds
+ * the permissible number of messages to send.
+ */
+static int
+ice_mbx_detect_malvf(struct ice_hw *hw, u16 vf_id,
+ enum ice_mbx_snapshot_state *new_state,
+ bool *is_malvf)
+{
+ struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
+
+ if (vf_id >= snap->mbx_vf.vfcntr_len)
+ return -EIO;
+
+ /* increment the message count in the VF array */
+ snap->mbx_vf.vf_cntr[vf_id]++;
+
+ if (snap->mbx_vf.vf_cntr[vf_id] >= ICE_ASYNC_VF_MSG_THRESHOLD)
+ *is_malvf = true;
+
+ /* continue to iterate through the mailbox snapshot */
+ ice_mbx_traverse(hw, new_state);
+
+ return 0;
+}
+
+/**
+ * ice_mbx_reset_snapshot - Reset mailbox snapshot structure
+ * @snap: pointer to mailbox snapshot structure in the ice_hw struct
+ *
+ * Reset the mailbox snapshot structure and clear VF counter array.
+ */
+static void ice_mbx_reset_snapshot(struct ice_mbx_snapshot *snap)
+{
+ u32 vfcntr_len;
+
+ if (!snap || !snap->mbx_vf.vf_cntr)
+ return;
+
+ /* Clear VF counters. */
+ vfcntr_len = snap->mbx_vf.vfcntr_len;
+ if (vfcntr_len)
+ memset(snap->mbx_vf.vf_cntr, 0,
+ (vfcntr_len * sizeof(*snap->mbx_vf.vf_cntr)));
+
+ /* Reset mailbox snapshot for a new capture. */
+ memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf));
+ snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
+}
+
+/**
+ * ice_mbx_vf_state_handler - Handle states of the overflow algorithm
+ * @hw: pointer to the HW struct
+ * @mbx_data: pointer to structure containing mailbox data
+ * @vf_id: relative virtual function (VF) ID
+ * @is_malvf: boolean output to indicate if VF is malicious
+ *
+ * The function serves as an entry point for the malicious VF
+ * detection algorithm by handling the different states and state
+ * transitions of the algorithm:
+ * New snapshot: This state is entered when creating a new static
+ * snapshot. The data from any previous mailbox snapshot is
+ * cleared and a new capture of the mailbox head and tail is
+ * logged. This will be the new static snapshot to detect
+ * asynchronous messages sent by VFs. On capturing the snapshot
+ * and depending on whether the number of pending messages in that
+ * snapshot exceed the watermark value, the state machine enters
+ * traverse or detect states.
+ * Traverse: If pending message count is below watermark then iterate
+ * through the snapshot without any action on VF.
+ * Detect: If pending message count exceeds watermark traverse
+ * the static snapshot and look for a malicious VF.
+ */
+int
+ice_mbx_vf_state_handler(struct ice_hw *hw,
+ struct ice_mbx_data *mbx_data, u16 vf_id,
+ bool *is_malvf)
+{
+ struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
+ struct ice_mbx_snap_buffer_data *snap_buf;
+ struct ice_ctl_q_info *cq = &hw->mailboxq;
+ enum ice_mbx_snapshot_state new_state;
+ int status = 0;
+
+ if (!is_malvf || !mbx_data)
+ return -EINVAL;
+
+ /* When entering the mailbox state machine assume that the VF
+ * is not malicious until detected.
+ */
+ *is_malvf = false;
+
+ /* Checking if max messages allowed to be processed while servicing current
+ * interrupt is not less than the defined AVF message threshold.
+ */
+ if (mbx_data->max_num_msgs_mbx <= ICE_ASYNC_VF_MSG_THRESHOLD)
+ return -EINVAL;
+
+ /* The watermark value should not be lesser than the threshold limit
+ * set for the number of asynchronous messages a VF can send to mailbox
+ * nor should it be greater than the maximum number of messages in the
+ * mailbox serviced in current interrupt.
+ */
+ if (mbx_data->async_watermark_val < ICE_ASYNC_VF_MSG_THRESHOLD ||
+ mbx_data->async_watermark_val > mbx_data->max_num_msgs_mbx)
+ return -EINVAL;
+
+ new_state = ICE_MAL_VF_DETECT_STATE_INVALID;
+ snap_buf = &snap->mbx_buf;
+
+ switch (snap_buf->state) {
+ case ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT:
+ /* Clear any previously held data in mailbox snapshot structure. */
+ ice_mbx_reset_snapshot(snap);
+
+ /* Collect the pending ARQ count, number of messages processed and
+ * the maximum number of messages allowed to be processed from the
+ * Mailbox for current interrupt.
+ */
+ snap_buf->num_pending_arq = mbx_data->num_pending_arq;
+ snap_buf->num_msg_proc = mbx_data->num_msg_proc;
+ snap_buf->max_num_msgs_mbx = mbx_data->max_num_msgs_mbx;
+
+ /* Capture a new static snapshot of the mailbox by logging the
+ * head and tail of snapshot and set num_iterations to the tail
+ * value to mark the start of the iteration through the snapshot.
+ */
+ snap_buf->head = ICE_RQ_DATA_MASK(cq->rq.next_to_clean +
+ mbx_data->num_pending_arq);
+ snap_buf->tail = ICE_RQ_DATA_MASK(cq->rq.next_to_clean - 1);
+ snap_buf->num_iterations = snap_buf->tail;
+
+ /* Pending ARQ messages returned by ice_clean_rq_elem
+ * is the difference between the head and tail of the
+ * mailbox queue. Comparing this value against the watermark
+ * helps to check if we potentially have malicious VFs.
+ */
+ if (snap_buf->num_pending_arq >=
+ mbx_data->async_watermark_val) {
+ new_state = ICE_MAL_VF_DETECT_STATE_DETECT;
+ status = ice_mbx_detect_malvf(hw, vf_id, &new_state, is_malvf);
+ } else {
+ new_state = ICE_MAL_VF_DETECT_STATE_TRAVERSE;
+ ice_mbx_traverse(hw, &new_state);
+ }
+ break;
+
+ case ICE_MAL_VF_DETECT_STATE_TRAVERSE:
+ new_state = ICE_MAL_VF_DETECT_STATE_TRAVERSE;
+ ice_mbx_traverse(hw, &new_state);
+ break;
+
+ case ICE_MAL_VF_DETECT_STATE_DETECT:
+ new_state = ICE_MAL_VF_DETECT_STATE_DETECT;
+ status = ice_mbx_detect_malvf(hw, vf_id, &new_state, is_malvf);
+ break;
+
+ default:
+ new_state = ICE_MAL_VF_DETECT_STATE_INVALID;
+ status = -EIO;
+ }
+
+ snap_buf->state = new_state;
+
+ return status;
+}
+
+/**
+ * ice_mbx_report_malvf - Track and note malicious VF
+ * @hw: pointer to the HW struct
+ * @all_malvfs: all malicious VFs tracked by PF
+ * @bitmap_len: length of bitmap in bits
+ * @vf_id: relative virtual function ID of the malicious VF
+ * @report_malvf: boolean to indicate if malicious VF must be reported
+ *
+ * This function will update a bitmap that keeps track of the malicious
+ * VFs attached to the PF. A malicious VF must be reported only once if
+ * discovered between VF resets or loading so the function checks
+ * the input vf_id against the bitmap to verify if the VF has been
+ * detected in any previous mailbox iterations.
+ */
+int
+ice_mbx_report_malvf(struct ice_hw *hw, unsigned long *all_malvfs,
+ u16 bitmap_len, u16 vf_id, bool *report_malvf)
+{
+ if (!all_malvfs || !report_malvf)
+ return -EINVAL;
+
+ *report_malvf = false;
+
+ if (bitmap_len < hw->mbx_snapshot.mbx_vf.vfcntr_len)
+ return -EINVAL;
+
+ if (vf_id >= bitmap_len)
+ return -EIO;
+
+ /* If the vf_id is found in the bitmap set bit and boolean to true */
+ if (!test_and_set_bit(vf_id, all_malvfs))
+ *report_malvf = true;
+
+ return 0;
+}
+
+/**
+ * ice_mbx_clear_malvf - Clear VF bitmap and counter for VF ID
+ * @snap: pointer to the mailbox snapshot structure
+ * @all_malvfs: all malicious VFs tracked by PF
+ * @bitmap_len: length of bitmap in bits
+ * @vf_id: relative virtual function ID of the malicious VF
+ *
+ * In case of a VF reset, this function can be called to clear
+ * the bit corresponding to the VF ID in the bitmap tracking all
+ * malicious VFs attached to the PF. The function also clears the
+ * VF counter array at the index of the VF ID. This is to ensure
+ * that the new VF loaded is not considered malicious before going
+ * through the overflow detection algorithm.
+ */
+int
+ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs,
+ u16 bitmap_len, u16 vf_id)
+{
+ if (!snap || !all_malvfs)
+ return -EINVAL;
+
+ if (bitmap_len < snap->mbx_vf.vfcntr_len)
+ return -EINVAL;
+
+ /* Ensure VF ID value is not larger than bitmap or VF counter length */
+ if (vf_id >= bitmap_len || vf_id >= snap->mbx_vf.vfcntr_len)
+ return -EIO;
+
+ /* Clear VF ID bit in the bitmap tracking malicious VFs attached to PF */
+ clear_bit(vf_id, all_malvfs);
+
+ /* Clear the VF counter in the mailbox snapshot structure for that VF ID.
+ * This is to ensure that if a VF is unloaded and a new one brought back
+ * up with the same VF ID for a snapshot currently in traversal or detect
+ * state the counter for that VF ID does not increment on top of existing
+ * values in the mailbox overflow detection algorithm.
+ */
+ snap->mbx_vf.vf_cntr[vf_id] = 0;
+
+ return 0;
+}
+
+/**
+ * ice_mbx_init_snapshot - Initialize mailbox snapshot structure
+ * @hw: pointer to the hardware structure
+ * @vf_count: number of VFs allocated on a PF
+ *
+ * Clear the mailbox snapshot structure and allocate memory
+ * for the VF counter array based on the number of VFs allocated
+ * on that PF.
+ *
+ * Assumption: This function will assume ice_get_caps() has already been
+ * called to ensure that the vf_count can be compared against the number
+ * of VFs supported as defined in the functional capabilities of the device.
+ */
+int ice_mbx_init_snapshot(struct ice_hw *hw, u16 vf_count)
+{
+ struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
+
+ /* Ensure that the number of VFs allocated is non-zero and
+ * is not greater than the number of supported VFs defined in
+ * the functional capabilities of the PF.
+ */
+ if (!vf_count || vf_count > hw->func_caps.num_allocd_vfs)
+ return -EINVAL;
+
+ snap->mbx_vf.vf_cntr = devm_kcalloc(ice_hw_to_dev(hw), vf_count,
+ sizeof(*snap->mbx_vf.vf_cntr),
+ GFP_KERNEL);
+ if (!snap->mbx_vf.vf_cntr)
+ return -ENOMEM;
+
+ /* Setting the VF counter length to the number of allocated
+ * VFs for given PF's functional capabilities.
+ */
+ snap->mbx_vf.vfcntr_len = vf_count;
+
+ /* Clear mbx_buf in the mailbox snaphot structure and setting the
+ * mailbox snapshot state to a new capture.
+ */
+ memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf));
+ snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
+
+ return 0;
+}
+
+/**
+ * ice_mbx_deinit_snapshot - Free mailbox snapshot structure
+ * @hw: pointer to the hardware structure
+ *
+ * Clear the mailbox snapshot structure and free the VF counter array.
+ */
+void ice_mbx_deinit_snapshot(struct ice_hw *hw)
+{
+ struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
+
+ /* Free VF counter array and reset VF counter length */
+ devm_kfree(ice_hw_to_dev(hw), snap->mbx_vf.vf_cntr);
+ snap->mbx_vf.vfcntr_len = 0;
+
+ /* Clear mbx_buf in the mailbox snaphot structure */
+ memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf));
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.h b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h
new file mode 100644
index 000000000..582716e6d
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_VF_MBX_H_
+#define _ICE_VF_MBX_H_
+
+#include "ice_type.h"
+#include "ice_controlq.h"
+
+/* Defining the mailbox message threshold as 63 asynchronous
+ * pending messages. Normal VF functionality does not require
+ * sending more than 63 asynchronous pending message.
+ */
+#define ICE_ASYNC_VF_MSG_THRESHOLD 63
+
+#ifdef CONFIG_PCI_IOV
+int
+ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval,
+ u8 *msg, u16 msglen, struct ice_sq_cd *cd);
+
+u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed);
+int
+ice_mbx_vf_state_handler(struct ice_hw *hw, struct ice_mbx_data *mbx_data,
+ u16 vf_id, bool *is_mal_vf);
+int
+ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs,
+ u16 bitmap_len, u16 vf_id);
+int ice_mbx_init_snapshot(struct ice_hw *hw, u16 vf_count);
+void ice_mbx_deinit_snapshot(struct ice_hw *hw);
+int
+ice_mbx_report_malvf(struct ice_hw *hw, unsigned long *all_malvfs,
+ u16 bitmap_len, u16 vf_id, bool *report_malvf);
+#else /* CONFIG_PCI_IOV */
+static inline int
+ice_aq_send_msg_to_vf(struct ice_hw __always_unused *hw,
+ u16 __always_unused vfid, u32 __always_unused v_opcode,
+ u32 __always_unused v_retval, u8 __always_unused *msg,
+ u16 __always_unused msglen,
+ struct ice_sq_cd __always_unused *cd)
+{
+ return 0;
+}
+
+static inline u32
+ice_conv_link_speed_to_virtchnl(bool __always_unused adv_link_support,
+ u16 __always_unused link_speed)
+{
+ return 0;
+}
+
+#endif /* CONFIG_PCI_IOV */
+#endif /* _ICE_VF_MBX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
new file mode 100644
index 000000000..b1ffb8189
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice_vsi_vlan_ops.h"
+#include "ice_vsi_vlan_lib.h"
+#include "ice_vlan_mode.h"
+#include "ice.h"
+#include "ice_vf_vsi_vlan_ops.h"
+#include "ice_sriov.h"
+
+static int
+noop_vlan_arg(struct ice_vsi __always_unused *vsi,
+ struct ice_vlan __always_unused *vlan)
+{
+ return 0;
+}
+
+static int
+noop_vlan(struct ice_vsi __always_unused *vsi)
+{
+ return 0;
+}
+
+/**
+ * ice_vf_vsi_init_vlan_ops - Initialize default VSI VLAN ops for VF VSI
+ * @vsi: VF's VSI being configured
+ *
+ * If Double VLAN Mode (DVM) is enabled, assume that the VF supports the new
+ * VIRTCHNL_VF_VLAN_OFFLOAD_V2 capability and set up the VLAN ops accordingly.
+ * If SVM is enabled maintain the same level of VLAN support previous to
+ * VIRTCHNL_VF_VLAN_OFFLOAD_V2.
+ */
+void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi)
+{
+ struct ice_vsi_vlan_ops *vlan_ops;
+ struct ice_pf *pf = vsi->back;
+ struct ice_vf *vf = vsi->vf;
+
+ if (WARN_ON(!vf))
+ return;
+
+ if (ice_is_dvm_ena(&pf->hw)) {
+ vlan_ops = &vsi->outer_vlan_ops;
+
+ /* outer VLAN ops regardless of port VLAN config */
+ vlan_ops->add_vlan = ice_vsi_add_vlan;
+ vlan_ops->ena_tx_filtering = ice_vsi_ena_tx_vlan_filtering;
+ vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering;
+
+ if (ice_vf_is_port_vlan_ena(vf)) {
+ /* setup outer VLAN ops */
+ vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
+ /* all Rx traffic should be in the domain of the
+ * assigned port VLAN, so prevent disabling Rx VLAN
+ * filtering
+ */
+ vlan_ops->dis_rx_filtering = noop_vlan;
+ vlan_ops->ena_rx_filtering =
+ ice_vsi_ena_rx_vlan_filtering;
+
+ /* setup inner VLAN ops */
+ vlan_ops = &vsi->inner_vlan_ops;
+ vlan_ops->add_vlan = noop_vlan_arg;
+ vlan_ops->del_vlan = noop_vlan_arg;
+ vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
+ vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
+ vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
+ vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
+ } else {
+ vlan_ops->dis_rx_filtering =
+ ice_vsi_dis_rx_vlan_filtering;
+
+ if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags))
+ vlan_ops->ena_rx_filtering = noop_vlan;
+ else
+ vlan_ops->ena_rx_filtering =
+ ice_vsi_ena_rx_vlan_filtering;
+
+ vlan_ops->del_vlan = ice_vsi_del_vlan;
+ vlan_ops->ena_stripping = ice_vsi_ena_outer_stripping;
+ vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping;
+ vlan_ops->ena_insertion = ice_vsi_ena_outer_insertion;
+ vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion;
+
+ /* setup inner VLAN ops */
+ vlan_ops = &vsi->inner_vlan_ops;
+
+ vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
+ vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
+ vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
+ vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
+ }
+ } else {
+ vlan_ops = &vsi->inner_vlan_ops;
+
+ /* inner VLAN ops regardless of port VLAN config */
+ vlan_ops->add_vlan = ice_vsi_add_vlan;
+ vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
+ vlan_ops->ena_tx_filtering = ice_vsi_ena_tx_vlan_filtering;
+ vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering;
+
+ if (ice_vf_is_port_vlan_ena(vf)) {
+ vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan;
+ vlan_ops->ena_rx_filtering =
+ ice_vsi_ena_rx_vlan_filtering;
+ /* all Rx traffic should be in the domain of the
+ * assigned port VLAN, so prevent disabling Rx VLAN
+ * filtering
+ */
+ vlan_ops->dis_rx_filtering = noop_vlan;
+ } else {
+ vlan_ops->dis_rx_filtering =
+ ice_vsi_dis_rx_vlan_filtering;
+ if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags))
+ vlan_ops->ena_rx_filtering = noop_vlan;
+ else
+ vlan_ops->ena_rx_filtering =
+ ice_vsi_ena_rx_vlan_filtering;
+
+ vlan_ops->del_vlan = ice_vsi_del_vlan;
+ vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
+ vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
+ vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
+ vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
+ }
+ }
+}
+
+/**
+ * ice_vf_vsi_cfg_dvm_legacy_vlan_mode - Config VLAN mode for old VFs in DVM
+ * @vsi: VF's VSI being configured
+ *
+ * This should only be called when Double VLAN Mode (DVM) is enabled, there
+ * is not a port VLAN enabled on this VF, and the VF negotiates
+ * VIRTCHNL_VF_OFFLOAD_VLAN.
+ *
+ * This function sets up the VF VSI's inner and outer ice_vsi_vlan_ops and also
+ * initializes software only VLAN mode (i.e. allow all VLANs). Also, use no-op
+ * implementations for any functions that may be called during the lifetime of
+ * the VF so these methods do nothing and succeed.
+ */
+void ice_vf_vsi_cfg_dvm_legacy_vlan_mode(struct ice_vsi *vsi)
+{
+ struct ice_vsi_vlan_ops *vlan_ops;
+ struct ice_vf *vf = vsi->vf;
+ struct device *dev;
+
+ if (WARN_ON(!vf))
+ return;
+
+ dev = ice_pf_to_dev(vf->pf);
+
+ if (!ice_is_dvm_ena(&vsi->back->hw) || ice_vf_is_port_vlan_ena(vf))
+ return;
+
+ vlan_ops = &vsi->outer_vlan_ops;
+
+ /* Rx VLAN filtering always disabled to allow software offloaded VLANs
+ * for VFs that only support VIRTCHNL_VF_OFFLOAD_VLAN and don't have a
+ * port VLAN configured
+ */
+ vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
+ /* Don't fail when attempting to enable Rx VLAN filtering */
+ vlan_ops->ena_rx_filtering = noop_vlan;
+
+ /* Tx VLAN filtering always disabled to allow software offloaded VLANs
+ * for VFs that only support VIRTCHNL_VF_OFFLOAD_VLAN and don't have a
+ * port VLAN configured
+ */
+ vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering;
+ /* Don't fail when attempting to enable Tx VLAN filtering */
+ vlan_ops->ena_tx_filtering = noop_vlan;
+
+ if (vlan_ops->dis_rx_filtering(vsi))
+ dev_dbg(dev, "Failed to disable Rx VLAN filtering for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n");
+ if (vlan_ops->dis_tx_filtering(vsi))
+ dev_dbg(dev, "Failed to disable Tx VLAN filtering for old VF without VIRTHCNL_VF_OFFLOAD_VLAN_V2 support\n");
+
+ /* All outer VLAN offloads must be disabled */
+ vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping;
+ vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion;
+
+ if (vlan_ops->dis_stripping(vsi))
+ dev_dbg(dev, "Failed to disable outer VLAN stripping for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n");
+
+ if (vlan_ops->dis_insertion(vsi))
+ dev_dbg(dev, "Failed to disable outer VLAN insertion for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n");
+
+ /* All inner VLAN offloads must be disabled */
+ vlan_ops = &vsi->inner_vlan_ops;
+
+ vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping;
+ vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion;
+
+ if (vlan_ops->dis_stripping(vsi))
+ dev_dbg(dev, "Failed to disable inner VLAN stripping for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n");
+
+ if (vlan_ops->dis_insertion(vsi))
+ dev_dbg(dev, "Failed to disable inner VLAN insertion for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n");
+}
+
+/**
+ * ice_vf_vsi_cfg_svm_legacy_vlan_mode - Config VLAN mode for old VFs in SVM
+ * @vsi: VF's VSI being configured
+ *
+ * This should only be called when Single VLAN Mode (SVM) is enabled, there is
+ * not a port VLAN enabled on this VF, and the VF negotiates
+ * VIRTCHNL_VF_OFFLOAD_VLAN.
+ *
+ * All of the normal SVM VLAN ops are identical for this case. However, by
+ * default Rx VLAN filtering should be turned off by default in this case.
+ */
+void ice_vf_vsi_cfg_svm_legacy_vlan_mode(struct ice_vsi *vsi)
+{
+ struct ice_vf *vf = vsi->vf;
+
+ if (WARN_ON(!vf))
+ return;
+
+ if (ice_is_dvm_ena(&vsi->back->hw) || ice_vf_is_port_vlan_ena(vf))
+ return;
+
+ if (vsi->inner_vlan_ops.dis_rx_filtering(vsi))
+ dev_dbg(ice_pf_to_dev(vf->pf), "Failed to disable Rx VLAN filtering for old VF with VIRTCHNL_VF_OFFLOAD_VLAN support\n");
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h
new file mode 100644
index 000000000..875a4e615
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_VF_VSI_VLAN_OPS_H_
+#define _ICE_VF_VSI_VLAN_OPS_H_
+
+#include "ice_vsi_vlan_ops.h"
+
+struct ice_vsi;
+
+void ice_vf_vsi_cfg_dvm_legacy_vlan_mode(struct ice_vsi *vsi);
+void ice_vf_vsi_cfg_svm_legacy_vlan_mode(struct ice_vsi *vsi);
+
+#ifdef CONFIG_PCI_IOV
+void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi);
+#else
+static inline void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi) { }
+#endif /* CONFIG_PCI_IOV */
+#endif /* _ICE_PF_VSI_VLAN_OPS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
new file mode 100644
index 000000000..2b4c791b6
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -0,0 +1,3826 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022, Intel Corporation. */
+
+#include "ice_virtchnl.h"
+#include "ice_vf_lib_private.h"
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
+#include "ice_virtchnl_allowlist.h"
+#include "ice_vf_vsi_vlan_ops.h"
+#include "ice_vlan.h"
+#include "ice_flex_pipe.h"
+#include "ice_dcb_lib.h"
+
+#define FIELD_SELECTOR(proto_hdr_field) \
+ BIT((proto_hdr_field) & PROTO_HDR_FIELD_MASK)
+
+struct ice_vc_hdr_match_type {
+ u32 vc_hdr; /* virtchnl headers (VIRTCHNL_PROTO_HDR_XXX) */
+ u32 ice_hdr; /* ice headers (ICE_FLOW_SEG_HDR_XXX) */
+};
+
+static const struct ice_vc_hdr_match_type ice_vc_hdr_list[] = {
+ {VIRTCHNL_PROTO_HDR_NONE, ICE_FLOW_SEG_HDR_NONE},
+ {VIRTCHNL_PROTO_HDR_ETH, ICE_FLOW_SEG_HDR_ETH},
+ {VIRTCHNL_PROTO_HDR_S_VLAN, ICE_FLOW_SEG_HDR_VLAN},
+ {VIRTCHNL_PROTO_HDR_C_VLAN, ICE_FLOW_SEG_HDR_VLAN},
+ {VIRTCHNL_PROTO_HDR_IPV4, ICE_FLOW_SEG_HDR_IPV4 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER},
+ {VIRTCHNL_PROTO_HDR_IPV6, ICE_FLOW_SEG_HDR_IPV6 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER},
+ {VIRTCHNL_PROTO_HDR_TCP, ICE_FLOW_SEG_HDR_TCP},
+ {VIRTCHNL_PROTO_HDR_UDP, ICE_FLOW_SEG_HDR_UDP},
+ {VIRTCHNL_PROTO_HDR_SCTP, ICE_FLOW_SEG_HDR_SCTP},
+ {VIRTCHNL_PROTO_HDR_PPPOE, ICE_FLOW_SEG_HDR_PPPOE},
+ {VIRTCHNL_PROTO_HDR_GTPU_IP, ICE_FLOW_SEG_HDR_GTPU_IP},
+ {VIRTCHNL_PROTO_HDR_GTPU_EH, ICE_FLOW_SEG_HDR_GTPU_EH},
+ {VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN,
+ ICE_FLOW_SEG_HDR_GTPU_DWN},
+ {VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP,
+ ICE_FLOW_SEG_HDR_GTPU_UP},
+ {VIRTCHNL_PROTO_HDR_L2TPV3, ICE_FLOW_SEG_HDR_L2TPV3},
+ {VIRTCHNL_PROTO_HDR_ESP, ICE_FLOW_SEG_HDR_ESP},
+ {VIRTCHNL_PROTO_HDR_AH, ICE_FLOW_SEG_HDR_AH},
+ {VIRTCHNL_PROTO_HDR_PFCP, ICE_FLOW_SEG_HDR_PFCP_SESSION},
+};
+
+struct ice_vc_hash_field_match_type {
+ u32 vc_hdr; /* virtchnl headers
+ * (VIRTCHNL_PROTO_HDR_XXX)
+ */
+ u32 vc_hash_field; /* virtchnl hash fields selector
+ * FIELD_SELECTOR((VIRTCHNL_PROTO_HDR_ETH_XXX))
+ */
+ u64 ice_hash_field; /* ice hash fields
+ * (BIT_ULL(ICE_FLOW_FIELD_IDX_XXX))
+ */
+};
+
+static const struct
+ice_vc_hash_field_match_type ice_vc_hash_field_list[] = {
+ {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_SA)},
+ {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_DA)},
+ {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST),
+ ICE_FLOW_HASH_ETH},
+ {VIRTCHNL_PROTO_HDR_ETH,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_TYPE)},
+ {VIRTCHNL_PROTO_HDR_S_VLAN,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_S_VLAN_ID),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_S_VLAN)},
+ {VIRTCHNL_PROTO_HDR_C_VLAN,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_C_VLAN_ID),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_C_VLAN)},
+ {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)},
+ {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)},
+ {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST),
+ ICE_FLOW_HASH_IPV4},
+ {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) |
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+ {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) |
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+ {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+ ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+ {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+ {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)},
+ {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)},
+ {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST),
+ ICE_FLOW_HASH_IPV6},
+ {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) |
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+ {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA) |
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+ {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+ ICE_FLOW_HASH_IPV6 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+ {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+ {VIRTCHNL_PROTO_HDR_TCP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)},
+ {VIRTCHNL_PROTO_HDR_TCP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)},
+ {VIRTCHNL_PROTO_HDR_TCP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT),
+ ICE_FLOW_HASH_TCP_PORT},
+ {VIRTCHNL_PROTO_HDR_UDP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)},
+ {VIRTCHNL_PROTO_HDR_UDP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)},
+ {VIRTCHNL_PROTO_HDR_UDP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT),
+ ICE_FLOW_HASH_UDP_PORT},
+ {VIRTCHNL_PROTO_HDR_SCTP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)},
+ {VIRTCHNL_PROTO_HDR_SCTP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)},
+ {VIRTCHNL_PROTO_HDR_SCTP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) |
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT),
+ ICE_FLOW_HASH_SCTP_PORT},
+ {VIRTCHNL_PROTO_HDR_PPPOE,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID)},
+ {VIRTCHNL_PROTO_HDR_GTPU_IP,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_GTPU_IP_TEID),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_IP_TEID)},
+ {VIRTCHNL_PROTO_HDR_L2TPV3,
+ FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID)},
+ {VIRTCHNL_PROTO_HDR_ESP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ESP_SPI),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_ESP_SPI)},
+ {VIRTCHNL_PROTO_HDR_AH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_AH_SPI),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_AH_SPI)},
+ {VIRTCHNL_PROTO_HDR_PFCP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PFCP_SEID),
+ BIT_ULL(ICE_FLOW_FIELD_IDX_PFCP_SEID)},
+};
+
+/**
+ * ice_vc_vf_broadcast - Broadcast a message to all VFs on PF
+ * @pf: pointer to the PF structure
+ * @v_opcode: operation code
+ * @v_retval: return value
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ */
+static void
+ice_vc_vf_broadcast(struct ice_pf *pf, enum virtchnl_ops v_opcode,
+ enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
+{
+ struct ice_hw *hw = &pf->hw;
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ mutex_lock(&pf->vfs.table_lock);
+ ice_for_each_vf(pf, bkt, vf) {
+ /* Not all vfs are enabled so skip the ones that are not */
+ if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states) &&
+ !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+ continue;
+
+ /* Ignore return value on purpose - a given VF may fail, but
+ * we need to keep going and send to all of them
+ */
+ ice_aq_send_msg_to_vf(hw, vf->vf_id, v_opcode, v_retval, msg,
+ msglen, NULL);
+ }
+ mutex_unlock(&pf->vfs.table_lock);
+}
+
+/**
+ * ice_set_pfe_link - Set the link speed/status of the virtchnl_pf_event
+ * @vf: pointer to the VF structure
+ * @pfe: pointer to the virtchnl_pf_event to set link speed/status for
+ * @ice_link_speed: link speed specified by ICE_AQ_LINK_SPEED_*
+ * @link_up: whether or not to set the link up/down
+ */
+static void
+ice_set_pfe_link(struct ice_vf *vf, struct virtchnl_pf_event *pfe,
+ int ice_link_speed, bool link_up)
+{
+ if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED) {
+ pfe->event_data.link_event_adv.link_status = link_up;
+ /* Speed in Mbps */
+ pfe->event_data.link_event_adv.link_speed =
+ ice_conv_link_speed_to_virtchnl(true, ice_link_speed);
+ } else {
+ pfe->event_data.link_event.link_status = link_up;
+ /* Legacy method for virtchnl link speeds */
+ pfe->event_data.link_event.link_speed =
+ (enum virtchnl_link_speed)
+ ice_conv_link_speed_to_virtchnl(false, ice_link_speed);
+ }
+}
+
+/**
+ * ice_vc_notify_vf_link_state - Inform a VF of link status
+ * @vf: pointer to the VF structure
+ *
+ * send a link status message to a single VF
+ */
+void ice_vc_notify_vf_link_state(struct ice_vf *vf)
+{
+ struct virtchnl_pf_event pfe = { 0 };
+ struct ice_hw *hw = &vf->pf->hw;
+
+ pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
+ pfe.severity = PF_EVENT_SEVERITY_INFO;
+
+ if (ice_is_vf_link_up(vf))
+ ice_set_pfe_link(vf, &pfe,
+ hw->port_info->phy.link_info.link_speed, true);
+ else
+ ice_set_pfe_link(vf, &pfe, ICE_AQ_LINK_SPEED_UNKNOWN, false);
+
+ ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT,
+ VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe,
+ sizeof(pfe), NULL);
+}
+
+/**
+ * ice_vc_notify_link_state - Inform all VFs on a PF of link status
+ * @pf: pointer to the PF structure
+ */
+void ice_vc_notify_link_state(struct ice_pf *pf)
+{
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ mutex_lock(&pf->vfs.table_lock);
+ ice_for_each_vf(pf, bkt, vf)
+ ice_vc_notify_vf_link_state(vf);
+ mutex_unlock(&pf->vfs.table_lock);
+}
+
+/**
+ * ice_vc_notify_reset - Send pending reset message to all VFs
+ * @pf: pointer to the PF structure
+ *
+ * indicate a pending reset to all VFs on a given PF
+ */
+void ice_vc_notify_reset(struct ice_pf *pf)
+{
+ struct virtchnl_pf_event pfe;
+
+ if (!ice_has_vfs(pf))
+ return;
+
+ pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
+ pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM;
+ ice_vc_vf_broadcast(pf, VIRTCHNL_OP_EVENT, VIRTCHNL_STATUS_SUCCESS,
+ (u8 *)&pfe, sizeof(struct virtchnl_pf_event));
+}
+
+/**
+ * ice_vc_send_msg_to_vf - Send message to VF
+ * @vf: pointer to the VF info
+ * @v_opcode: virtual channel opcode
+ * @v_retval: virtual channel return value
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * send msg to VF
+ */
+int
+ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
+ enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
+{
+ struct device *dev;
+ struct ice_pf *pf;
+ int aq_ret;
+
+ pf = vf->pf;
+ dev = ice_pf_to_dev(pf);
+
+ aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval,
+ msg, msglen, NULL);
+ if (aq_ret && pf->hw.mailboxq.sq_last_status != ICE_AQ_RC_ENOSYS) {
+ dev_info(dev, "Unable to send the message to VF %d ret %d aq_err %s\n",
+ vf->vf_id, aq_ret,
+ ice_aq_str(pf->hw.mailboxq.sq_last_status));
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vc_get_ver_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to request the API version used by the PF
+ */
+static int ice_vc_get_ver_msg(struct ice_vf *vf, u8 *msg)
+{
+ struct virtchnl_version_info info = {
+ VIRTCHNL_VERSION_MAJOR, VIRTCHNL_VERSION_MINOR
+ };
+
+ vf->vf_ver = *(struct virtchnl_version_info *)msg;
+ /* VFs running the 1.0 API expect to get 1.0 back or they will cry. */
+ if (VF_IS_V10(&vf->vf_ver))
+ info.minor = VIRTCHNL_VERSION_MINOR_NO_VF_CAPS;
+
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_VERSION,
+ VIRTCHNL_STATUS_SUCCESS, (u8 *)&info,
+ sizeof(struct virtchnl_version_info));
+}
+
+/**
+ * ice_vc_get_max_frame_size - get max frame size allowed for VF
+ * @vf: VF used to determine max frame size
+ *
+ * Max frame size is determined based on the current port's max frame size and
+ * whether a port VLAN is configured on this VF. The VF is not aware whether
+ * it's in a port VLAN so the PF needs to account for this in max frame size
+ * checks and sending the max frame size to the VF.
+ */
+static u16 ice_vc_get_max_frame_size(struct ice_vf *vf)
+{
+ struct ice_port_info *pi = ice_vf_get_port_info(vf);
+ u16 max_frame_size;
+
+ max_frame_size = pi->phy.link_info.max_frame_size;
+
+ if (ice_vf_is_port_vlan_ena(vf))
+ max_frame_size -= VLAN_HLEN;
+
+ return max_frame_size;
+}
+
+/**
+ * ice_vc_get_vlan_caps
+ * @hw: pointer to the hw
+ * @vf: pointer to the VF info
+ * @vsi: pointer to the VSI
+ * @driver_caps: current driver caps
+ *
+ * Return 0 if there is no VLAN caps supported, or VLAN caps value
+ */
+static u32
+ice_vc_get_vlan_caps(struct ice_hw *hw, struct ice_vf *vf, struct ice_vsi *vsi,
+ u32 driver_caps)
+{
+ if (ice_is_eswitch_mode_switchdev(vf->pf))
+ /* In switchdev setting VLAN from VF isn't supported */
+ return 0;
+
+ if (driver_caps & VIRTCHNL_VF_OFFLOAD_VLAN_V2) {
+ /* VLAN offloads based on current device configuration */
+ return VIRTCHNL_VF_OFFLOAD_VLAN_V2;
+ } else if (driver_caps & VIRTCHNL_VF_OFFLOAD_VLAN) {
+ /* allow VF to negotiate VIRTCHNL_VF_OFFLOAD explicitly for
+ * these two conditions, which amounts to guest VLAN filtering
+ * and offloads being based on the inner VLAN or the
+ * inner/single VLAN respectively and don't allow VF to
+ * negotiate VIRTCHNL_VF_OFFLOAD in any other cases
+ */
+ if (ice_is_dvm_ena(hw) && ice_vf_is_port_vlan_ena(vf)) {
+ return VIRTCHNL_VF_OFFLOAD_VLAN;
+ } else if (!ice_is_dvm_ena(hw) &&
+ !ice_vf_is_port_vlan_ena(vf)) {
+ /* configure backward compatible support for VFs that
+ * only support VIRTCHNL_VF_OFFLOAD_VLAN, the PF is
+ * configured in SVM, and no port VLAN is configured
+ */
+ ice_vf_vsi_cfg_svm_legacy_vlan_mode(vsi);
+ return VIRTCHNL_VF_OFFLOAD_VLAN;
+ } else if (ice_is_dvm_ena(hw)) {
+ /* configure software offloaded VLAN support when DVM
+ * is enabled, but no port VLAN is enabled
+ */
+ ice_vf_vsi_cfg_dvm_legacy_vlan_mode(vsi);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vc_get_vf_res_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to request its resources
+ */
+static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_vf_resource *vfres = NULL;
+ struct ice_hw *hw = &vf->pf->hw;
+ struct ice_vsi *vsi;
+ int len = 0;
+ int ret;
+
+ if (ice_check_vf_init(vf)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+
+ len = sizeof(struct virtchnl_vf_resource);
+
+ vfres = kzalloc(len, GFP_KERNEL);
+ if (!vfres) {
+ v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+ len = 0;
+ goto err;
+ }
+ if (VF_IS_V11(&vf->vf_ver))
+ vf->driver_caps = *(u32 *)msg;
+ else
+ vf->driver_caps = VIRTCHNL_VF_OFFLOAD_L2 |
+ VIRTCHNL_VF_OFFLOAD_RSS_REG |
+ VIRTCHNL_VF_OFFLOAD_VLAN;
+
+ vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2;
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto err;
+ }
+
+ vfres->vf_cap_flags |= ice_vc_get_vlan_caps(hw, vf, vsi,
+ vf->driver_caps);
+
+ if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF;
+ } else {
+ if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ)
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ;
+ else
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG;
+ }
+
+ if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF)
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_FDIR_PF;
+
+ if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2;
+
+ if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP)
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP;
+
+ if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM)
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM;
+
+ if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_POLLING)
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_POLLING;
+
+ if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
+
+ if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
+
+ if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED)
+ vfres->vf_cap_flags |= VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
+
+ if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF)
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF;
+
+ if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_USO)
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_USO;
+
+ vfres->num_vsis = 1;
+ /* Tx and Rx queue are equal for VF */
+ vfres->num_queue_pairs = vsi->num_txq;
+ vfres->max_vectors = vf->pf->vfs.num_msix_per;
+ vfres->rss_key_size = ICE_VSIQF_HKEY_ARRAY_SIZE;
+ vfres->rss_lut_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
+ vfres->max_mtu = ice_vc_get_max_frame_size(vf);
+
+ vfres->vsi_res[0].vsi_id = vf->lan_vsi_num;
+ vfres->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV;
+ vfres->vsi_res[0].num_queue_pairs = vsi->num_txq;
+ ether_addr_copy(vfres->vsi_res[0].default_mac_addr,
+ vf->hw_lan_addr.addr);
+
+ /* match guest capabilities */
+ vf->driver_caps = vfres->vf_cap_flags;
+
+ ice_vc_set_caps_allowlist(vf);
+ ice_vc_set_working_allowlist(vf);
+
+ set_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
+
+err:
+ /* send the response back to the VF */
+ ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_VF_RESOURCES, v_ret,
+ (u8 *)vfres, len);
+
+ kfree(vfres);
+ return ret;
+}
+
+/**
+ * ice_vc_reset_vf_msg
+ * @vf: pointer to the VF info
+ *
+ * called from the VF to reset itself,
+ * unlike other virtchnl messages, PF driver
+ * doesn't send the response back to the VF
+ */
+static void ice_vc_reset_vf_msg(struct ice_vf *vf)
+{
+ if (test_bit(ICE_VF_STATE_INIT, vf->vf_states))
+ ice_reset_vf(vf, 0);
+}
+
+/**
+ * ice_vc_isvalid_vsi_id
+ * @vf: pointer to the VF info
+ * @vsi_id: VF relative VSI ID
+ *
+ * check for the valid VSI ID
+ */
+bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id)
+{
+ struct ice_pf *pf = vf->pf;
+ struct ice_vsi *vsi;
+
+ vsi = ice_find_vsi(pf, vsi_id);
+
+ return (vsi && (vsi->vf == vf));
+}
+
+/**
+ * ice_vc_isvalid_q_id
+ * @vf: pointer to the VF info
+ * @vsi_id: VSI ID
+ * @qid: VSI relative queue ID
+ *
+ * check for the valid queue ID
+ */
+static bool ice_vc_isvalid_q_id(struct ice_vf *vf, u16 vsi_id, u8 qid)
+{
+ struct ice_vsi *vsi = ice_find_vsi(vf->pf, vsi_id);
+ /* allocated Tx and Rx queues should be always equal for VF VSI */
+ return (vsi && (qid < vsi->alloc_txq));
+}
+
+/**
+ * ice_vc_isvalid_ring_len
+ * @ring_len: length of ring
+ *
+ * check for the valid ring count, should be multiple of ICE_REQ_DESC_MULTIPLE
+ * or zero
+ */
+static bool ice_vc_isvalid_ring_len(u16 ring_len)
+{
+ return ring_len == 0 ||
+ (ring_len >= ICE_MIN_NUM_DESC &&
+ ring_len <= ICE_MAX_NUM_DESC &&
+ !(ring_len % ICE_REQ_DESC_MULTIPLE));
+}
+
+/**
+ * ice_vc_validate_pattern
+ * @vf: pointer to the VF info
+ * @proto: virtchnl protocol headers
+ *
+ * validate the pattern is supported or not.
+ *
+ * Return: true on success, false on error.
+ */
+bool
+ice_vc_validate_pattern(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto)
+{
+ bool is_ipv4 = false;
+ bool is_ipv6 = false;
+ bool is_udp = false;
+ u16 ptype = -1;
+ int i = 0;
+
+ while (i < proto->count &&
+ proto->proto_hdr[i].type != VIRTCHNL_PROTO_HDR_NONE) {
+ switch (proto->proto_hdr[i].type) {
+ case VIRTCHNL_PROTO_HDR_ETH:
+ ptype = ICE_PTYPE_MAC_PAY;
+ break;
+ case VIRTCHNL_PROTO_HDR_IPV4:
+ ptype = ICE_PTYPE_IPV4_PAY;
+ is_ipv4 = true;
+ break;
+ case VIRTCHNL_PROTO_HDR_IPV6:
+ ptype = ICE_PTYPE_IPV6_PAY;
+ is_ipv6 = true;
+ break;
+ case VIRTCHNL_PROTO_HDR_UDP:
+ if (is_ipv4)
+ ptype = ICE_PTYPE_IPV4_UDP_PAY;
+ else if (is_ipv6)
+ ptype = ICE_PTYPE_IPV6_UDP_PAY;
+ is_udp = true;
+ break;
+ case VIRTCHNL_PROTO_HDR_TCP:
+ if (is_ipv4)
+ ptype = ICE_PTYPE_IPV4_TCP_PAY;
+ else if (is_ipv6)
+ ptype = ICE_PTYPE_IPV6_TCP_PAY;
+ break;
+ case VIRTCHNL_PROTO_HDR_SCTP:
+ if (is_ipv4)
+ ptype = ICE_PTYPE_IPV4_SCTP_PAY;
+ else if (is_ipv6)
+ ptype = ICE_PTYPE_IPV6_SCTP_PAY;
+ break;
+ case VIRTCHNL_PROTO_HDR_GTPU_IP:
+ case VIRTCHNL_PROTO_HDR_GTPU_EH:
+ if (is_ipv4)
+ ptype = ICE_MAC_IPV4_GTPU;
+ else if (is_ipv6)
+ ptype = ICE_MAC_IPV6_GTPU;
+ goto out;
+ case VIRTCHNL_PROTO_HDR_L2TPV3:
+ if (is_ipv4)
+ ptype = ICE_MAC_IPV4_L2TPV3;
+ else if (is_ipv6)
+ ptype = ICE_MAC_IPV6_L2TPV3;
+ goto out;
+ case VIRTCHNL_PROTO_HDR_ESP:
+ if (is_ipv4)
+ ptype = is_udp ? ICE_MAC_IPV4_NAT_T_ESP :
+ ICE_MAC_IPV4_ESP;
+ else if (is_ipv6)
+ ptype = is_udp ? ICE_MAC_IPV6_NAT_T_ESP :
+ ICE_MAC_IPV6_ESP;
+ goto out;
+ case VIRTCHNL_PROTO_HDR_AH:
+ if (is_ipv4)
+ ptype = ICE_MAC_IPV4_AH;
+ else if (is_ipv6)
+ ptype = ICE_MAC_IPV6_AH;
+ goto out;
+ case VIRTCHNL_PROTO_HDR_PFCP:
+ if (is_ipv4)
+ ptype = ICE_MAC_IPV4_PFCP_SESSION;
+ else if (is_ipv6)
+ ptype = ICE_MAC_IPV6_PFCP_SESSION;
+ goto out;
+ default:
+ break;
+ }
+ i++;
+ }
+
+out:
+ return ice_hw_ptype_ena(&vf->pf->hw, ptype);
+}
+
+/**
+ * ice_vc_parse_rss_cfg - parses hash fields and headers from
+ * a specific virtchnl RSS cfg
+ * @hw: pointer to the hardware
+ * @rss_cfg: pointer to the virtchnl RSS cfg
+ * @addl_hdrs: pointer to the protocol header fields (ICE_FLOW_SEG_HDR_*)
+ * to configure
+ * @hash_flds: pointer to the hash bit fields (ICE_FLOW_HASH_*) to configure
+ *
+ * Return true if all the protocol header and hash fields in the RSS cfg could
+ * be parsed, else return false
+ *
+ * This function parses the virtchnl RSS cfg to be the intended
+ * hash fields and the intended header for RSS configuration
+ */
+static bool
+ice_vc_parse_rss_cfg(struct ice_hw *hw, struct virtchnl_rss_cfg *rss_cfg,
+ u32 *addl_hdrs, u64 *hash_flds)
+{
+ const struct ice_vc_hash_field_match_type *hf_list;
+ const struct ice_vc_hdr_match_type *hdr_list;
+ int i, hf_list_len, hdr_list_len;
+
+ hf_list = ice_vc_hash_field_list;
+ hf_list_len = ARRAY_SIZE(ice_vc_hash_field_list);
+ hdr_list = ice_vc_hdr_list;
+ hdr_list_len = ARRAY_SIZE(ice_vc_hdr_list);
+
+ for (i = 0; i < rss_cfg->proto_hdrs.count; i++) {
+ struct virtchnl_proto_hdr *proto_hdr =
+ &rss_cfg->proto_hdrs.proto_hdr[i];
+ bool hdr_found = false;
+ int j;
+
+ /* Find matched ice headers according to virtchnl headers. */
+ for (j = 0; j < hdr_list_len; j++) {
+ struct ice_vc_hdr_match_type hdr_map = hdr_list[j];
+
+ if (proto_hdr->type == hdr_map.vc_hdr) {
+ *addl_hdrs |= hdr_map.ice_hdr;
+ hdr_found = true;
+ }
+ }
+
+ if (!hdr_found)
+ return false;
+
+ /* Find matched ice hash fields according to
+ * virtchnl hash fields.
+ */
+ for (j = 0; j < hf_list_len; j++) {
+ struct ice_vc_hash_field_match_type hf_map = hf_list[j];
+
+ if (proto_hdr->type == hf_map.vc_hdr &&
+ proto_hdr->field_selector == hf_map.vc_hash_field) {
+ *hash_flds |= hf_map.ice_hash_field;
+ break;
+ }
+ }
+ }
+
+ return true;
+}
+
+/**
+ * ice_vf_adv_rss_offload_ena - determine if capabilities support advanced
+ * RSS offloads
+ * @caps: VF driver negotiated capabilities
+ *
+ * Return true if VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF capability is set,
+ * else return false
+ */
+static bool ice_vf_adv_rss_offload_ena(u32 caps)
+{
+ return !!(caps & VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF);
+}
+
+/**
+ * ice_vc_handle_rss_cfg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the message buffer
+ * @add: add a RSS config if true, otherwise delete a RSS config
+ *
+ * This function adds/deletes a RSS config
+ */
+static int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add)
+{
+ u32 v_opcode = add ? VIRTCHNL_OP_ADD_RSS_CFG : VIRTCHNL_OP_DEL_RSS_CFG;
+ struct virtchnl_rss_cfg *rss_cfg = (struct virtchnl_rss_cfg *)msg;
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ struct ice_hw *hw = &vf->pf->hw;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
+ dev_dbg(dev, "VF %d attempting to configure RSS, but RSS is not supported by the PF\n",
+ vf->vf_id);
+ v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
+ goto error_param;
+ }
+
+ if (!ice_vf_adv_rss_offload_ena(vf->driver_caps)) {
+ dev_dbg(dev, "VF %d attempting to configure RSS, but Advanced RSS offload is not supported\n",
+ vf->vf_id);
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (rss_cfg->proto_hdrs.count > VIRTCHNL_MAX_NUM_PROTO_HDRS ||
+ rss_cfg->rss_algorithm < VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC ||
+ rss_cfg->rss_algorithm > VIRTCHNL_RSS_ALG_XOR_SYMMETRIC) {
+ dev_dbg(dev, "VF %d attempting to configure RSS, but RSS configuration is not valid\n",
+ vf->vf_id);
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_validate_pattern(vf, &rss_cfg->proto_hdrs)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_R_ASYMMETRIC) {
+ struct ice_vsi_ctx *ctx;
+ u8 lut_type, hash_type;
+ int status;
+
+ lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
+ hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_XOR :
+ ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx) {
+ v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+ goto error_param;
+ }
+
+ ctx->info.q_opt_rss = ((lut_type <<
+ ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
+ ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
+ (hash_type &
+ ICE_AQ_VSI_Q_OPT_RSS_HASH_M);
+
+ /* Preserve existing queueing option setting */
+ ctx->info.q_opt_rss |= (vsi->info.q_opt_rss &
+ ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M);
+ ctx->info.q_opt_tc = vsi->info.q_opt_tc;
+ ctx->info.q_opt_flags = vsi->info.q_opt_rss;
+
+ ctx->info.valid_sections =
+ cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
+
+ status = ice_update_vsi(hw, vsi->idx, ctx, NULL);
+ if (status) {
+ dev_err(dev, "update VSI for RSS failed, err %d aq_err %s\n",
+ status, ice_aq_str(hw->adminq.sq_last_status));
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ } else {
+ vsi->info.q_opt_rss = ctx->info.q_opt_rss;
+ }
+
+ kfree(ctx);
+ } else {
+ u32 addl_hdrs = ICE_FLOW_SEG_HDR_NONE;
+ u64 hash_flds = ICE_HASH_INVALID;
+
+ if (!ice_vc_parse_rss_cfg(hw, rss_cfg, &addl_hdrs,
+ &hash_flds)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (add) {
+ if (ice_add_rss_cfg(hw, vsi->idx, hash_flds,
+ addl_hdrs)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ dev_err(dev, "ice_add_rss_cfg failed for vsi = %d, v_ret = %d\n",
+ vsi->vsi_num, v_ret);
+ }
+ } else {
+ int status;
+
+ status = ice_rem_rss_cfg(hw, vsi->idx, hash_flds,
+ addl_hdrs);
+ /* We just ignore -ENOENT, because if two configurations
+ * share the same profile remove one of them actually
+ * removes both, since the profile is deleted.
+ */
+ if (status && status != -ENOENT) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ dev_err(dev, "ice_rem_rss_cfg failed for VF ID:%d, error:%d\n",
+ vf->vf_id, status);
+ }
+ }
+ }
+
+error_param:
+ return ice_vc_send_msg_to_vf(vf, v_opcode, v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_config_rss_key
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Configure the VF's RSS key
+ */
+static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_rss_key *vrk =
+ (struct virtchnl_rss_key *)msg;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, vrk->vsi_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (vrk->key_len != ICE_VSIQF_HKEY_ARRAY_SIZE) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (ice_set_rss_key(vsi, vrk->key))
+ v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+error_param:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY, v_ret,
+ NULL, 0);
+}
+
+/**
+ * ice_vc_config_rss_lut
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Configure the VF's RSS LUT
+ */
+static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg)
+{
+ struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg;
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, vrl->vsi_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (vrl->lut_entries != ICE_VSIQF_HLUT_ARRAY_SIZE) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (ice_set_rss_lut(vsi, vrl->lut, ICE_VSIQF_HLUT_ARRAY_SIZE))
+ v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+error_param:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT, v_ret,
+ NULL, 0);
+}
+
+/**
+ * ice_vc_cfg_promiscuous_mode_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to configure VF VSIs promiscuous mode
+ */
+static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ bool rm_promisc, alluni = false, allmulti = false;
+ struct virtchnl_promisc_info *info =
+ (struct virtchnl_promisc_info *)msg;
+ struct ice_vsi_vlan_ops *vlan_ops;
+ int mcast_err = 0, ucast_err = 0;
+ struct ice_pf *pf = vf->pf;
+ struct ice_vsi *vsi;
+ u8 mcast_m, ucast_m;
+ struct device *dev;
+ int ret = 0;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, info->vsi_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ dev = ice_pf_to_dev(pf);
+ if (!ice_is_vf_trusted(vf)) {
+ dev_err(dev, "Unprivileged VF %d is attempting to configure promiscuous mode\n",
+ vf->vf_id);
+ /* Leave v_ret alone, lie to the VF on purpose. */
+ goto error_param;
+ }
+
+ if (info->flags & FLAG_VF_UNICAST_PROMISC)
+ alluni = true;
+
+ if (info->flags & FLAG_VF_MULTICAST_PROMISC)
+ allmulti = true;
+
+ rm_promisc = !allmulti && !alluni;
+
+ vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+ if (rm_promisc)
+ ret = vlan_ops->ena_rx_filtering(vsi);
+ else
+ ret = vlan_ops->dis_rx_filtering(vsi);
+ if (ret) {
+ dev_err(dev, "Failed to configure VLAN pruning in promiscuous mode\n");
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ ice_vf_get_promisc_masks(vf, vsi, &ucast_m, &mcast_m);
+
+ if (!test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags)) {
+ if (alluni) {
+ /* in this case we're turning on promiscuous mode */
+ ret = ice_set_dflt_vsi(vsi);
+ } else {
+ /* in this case we're turning off promiscuous mode */
+ if (ice_is_dflt_vsi_in_use(vsi->port_info))
+ ret = ice_clear_dflt_vsi(vsi);
+ }
+
+ /* in this case we're turning on/off only
+ * allmulticast
+ */
+ if (allmulti)
+ mcast_err = ice_vf_set_vsi_promisc(vf, vsi, mcast_m);
+ else
+ mcast_err = ice_vf_clear_vsi_promisc(vf, vsi, mcast_m);
+
+ if (ret) {
+ dev_err(dev, "Turning on/off promiscuous mode for VF %d failed, error: %d\n",
+ vf->vf_id, ret);
+ v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+ goto error_param;
+ }
+ } else {
+ if (alluni)
+ ucast_err = ice_vf_set_vsi_promisc(vf, vsi, ucast_m);
+ else
+ ucast_err = ice_vf_clear_vsi_promisc(vf, vsi, ucast_m);
+
+ if (allmulti)
+ mcast_err = ice_vf_set_vsi_promisc(vf, vsi, mcast_m);
+ else
+ mcast_err = ice_vf_clear_vsi_promisc(vf, vsi, mcast_m);
+
+ if (ucast_err || mcast_err)
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ }
+
+ if (!mcast_err) {
+ if (allmulti &&
+ !test_and_set_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
+ dev_info(dev, "VF %u successfully set multicast promiscuous mode\n",
+ vf->vf_id);
+ else if (!allmulti &&
+ test_and_clear_bit(ICE_VF_STATE_MC_PROMISC,
+ vf->vf_states))
+ dev_info(dev, "VF %u successfully unset multicast promiscuous mode\n",
+ vf->vf_id);
+ } else {
+ dev_err(dev, "Error while modifying multicast promiscuous mode for VF %u, error: %d\n",
+ vf->vf_id, mcast_err);
+ }
+
+ if (!ucast_err) {
+ if (alluni &&
+ !test_and_set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states))
+ dev_info(dev, "VF %u successfully set unicast promiscuous mode\n",
+ vf->vf_id);
+ else if (!alluni &&
+ test_and_clear_bit(ICE_VF_STATE_UC_PROMISC,
+ vf->vf_states))
+ dev_info(dev, "VF %u successfully unset unicast promiscuous mode\n",
+ vf->vf_id);
+ } else {
+ dev_err(dev, "Error while modifying unicast promiscuous mode for VF %u, error: %d\n",
+ vf->vf_id, ucast_err);
+ }
+
+error_param:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+ v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_get_stats_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to get VSI stats
+ */
+static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_queue_select *vqs =
+ (struct virtchnl_queue_select *)msg;
+ struct ice_eth_stats stats = { 0 };
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ ice_update_eth_stats(vsi);
+
+ stats = vsi->eth_stats;
+
+error_param:
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_STATS, v_ret,
+ (u8 *)&stats, sizeof(stats));
+}
+
+/**
+ * ice_vc_validate_vqs_bitmaps - validate Rx/Tx queue bitmaps from VIRTCHNL
+ * @vqs: virtchnl_queue_select structure containing bitmaps to validate
+ *
+ * Return true on successful validation, else false
+ */
+static bool ice_vc_validate_vqs_bitmaps(struct virtchnl_queue_select *vqs)
+{
+ if ((!vqs->rx_queues && !vqs->tx_queues) ||
+ vqs->rx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF) ||
+ vqs->tx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF))
+ return false;
+
+ return true;
+}
+
+/**
+ * ice_vf_ena_txq_interrupt - enable Tx queue interrupt via QINT_TQCTL
+ * @vsi: VSI of the VF to configure
+ * @q_idx: VF queue index used to determine the queue in the PF's space
+ */
+static void ice_vf_ena_txq_interrupt(struct ice_vsi *vsi, u32 q_idx)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ u32 pfq = vsi->txq_map[q_idx];
+ u32 reg;
+
+ reg = rd32(hw, QINT_TQCTL(pfq));
+
+ /* MSI-X index 0 in the VF's space is always for the OICR, which means
+ * this is most likely a poll mode VF driver, so don't enable an
+ * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP
+ */
+ if (!(reg & QINT_TQCTL_MSIX_INDX_M))
+ return;
+
+ wr32(hw, QINT_TQCTL(pfq), reg | QINT_TQCTL_CAUSE_ENA_M);
+}
+
+/**
+ * ice_vf_ena_rxq_interrupt - enable Tx queue interrupt via QINT_RQCTL
+ * @vsi: VSI of the VF to configure
+ * @q_idx: VF queue index used to determine the queue in the PF's space
+ */
+static void ice_vf_ena_rxq_interrupt(struct ice_vsi *vsi, u32 q_idx)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ u32 pfq = vsi->rxq_map[q_idx];
+ u32 reg;
+
+ reg = rd32(hw, QINT_RQCTL(pfq));
+
+ /* MSI-X index 0 in the VF's space is always for the OICR, which means
+ * this is most likely a poll mode VF driver, so don't enable an
+ * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP
+ */
+ if (!(reg & QINT_RQCTL_MSIX_INDX_M))
+ return;
+
+ wr32(hw, QINT_RQCTL(pfq), reg | QINT_RQCTL_CAUSE_ENA_M);
+}
+
+/**
+ * ice_vc_ena_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to enable all or specific queue(s)
+ */
+static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_queue_select *vqs =
+ (struct virtchnl_queue_select *)msg;
+ struct ice_vsi *vsi;
+ unsigned long q_map;
+ u16 vf_q_id;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_validate_vqs_bitmaps(vqs)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* Enable only Rx rings, Tx rings were enabled by the FW when the
+ * Tx queue group list was configured and the context bits were
+ * programmed using ice_vsi_cfg_txqs
+ */
+ q_map = vqs->rx_queues;
+ for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+ if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* Skip queue if enabled */
+ if (test_bit(vf_q_id, vf->rxq_ena))
+ continue;
+
+ if (ice_vsi_ctrl_one_rx_ring(vsi, true, vf_q_id, true)) {
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to enable Rx ring %d on VSI %d\n",
+ vf_q_id, vsi->vsi_num);
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ ice_vf_ena_rxq_interrupt(vsi, vf_q_id);
+ set_bit(vf_q_id, vf->rxq_ena);
+ }
+
+ q_map = vqs->tx_queues;
+ for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+ if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* Skip queue if enabled */
+ if (test_bit(vf_q_id, vf->txq_ena))
+ continue;
+
+ ice_vf_ena_txq_interrupt(vsi, vf_q_id);
+ set_bit(vf_q_id, vf->txq_ena);
+ }
+
+ /* Set flag to indicate that queues are enabled */
+ if (v_ret == VIRTCHNL_STATUS_SUCCESS)
+ set_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
+
+error_param:
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, v_ret,
+ NULL, 0);
+}
+
+/**
+ * ice_vf_vsi_dis_single_txq - disable a single Tx queue
+ * @vf: VF to disable queue for
+ * @vsi: VSI for the VF
+ * @q_id: VF relative (0-based) queue ID
+ *
+ * Attempt to disable the Tx queue passed in. If the Tx queue was successfully
+ * disabled then clear q_id bit in the enabled queues bitmap and return
+ * success. Otherwise return error.
+ */
+static int
+ice_vf_vsi_dis_single_txq(struct ice_vf *vf, struct ice_vsi *vsi, u16 q_id)
+{
+ struct ice_txq_meta txq_meta = { 0 };
+ struct ice_tx_ring *ring;
+ int err;
+
+ if (!test_bit(q_id, vf->txq_ena))
+ dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n",
+ q_id, vsi->vsi_num);
+
+ ring = vsi->tx_rings[q_id];
+ if (!ring)
+ return -EINVAL;
+
+ ice_fill_txq_meta(vsi, ring, &txq_meta);
+
+ err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, ring, &txq_meta);
+ if (err) {
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n",
+ q_id, vsi->vsi_num);
+ return err;
+ }
+
+ /* Clear enabled queues flag */
+ clear_bit(q_id, vf->txq_ena);
+
+ return 0;
+}
+
+/**
+ * ice_vc_dis_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to disable all or specific queue(s)
+ */
+static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_queue_select *vqs =
+ (struct virtchnl_queue_select *)msg;
+ struct ice_vsi *vsi;
+ unsigned long q_map;
+ u16 vf_q_id;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) &&
+ !test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_validate_vqs_bitmaps(vqs)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (vqs->tx_queues) {
+ q_map = vqs->tx_queues;
+
+ for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+ if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (ice_vf_vsi_dis_single_txq(vf, vsi, vf_q_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+ }
+ }
+
+ q_map = vqs->rx_queues;
+ /* speed up Rx queue disable by batching them if possible */
+ if (q_map &&
+ bitmap_equal(&q_map, vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF)) {
+ if (ice_vsi_stop_all_rx_rings(vsi)) {
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to stop all Rx rings on VSI %d\n",
+ vsi->vsi_num);
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF);
+ } else if (q_map) {
+ for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+ if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* Skip queue if not enabled */
+ if (!test_bit(vf_q_id, vf->rxq_ena))
+ continue;
+
+ if (ice_vsi_ctrl_one_rx_ring(vsi, false, vf_q_id,
+ true)) {
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Rx ring %d on VSI %d\n",
+ vf_q_id, vsi->vsi_num);
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* Clear enabled queues flag */
+ clear_bit(vf_q_id, vf->rxq_ena);
+ }
+ }
+
+ /* Clear enabled queues flag */
+ if (v_ret == VIRTCHNL_STATUS_SUCCESS && ice_vf_has_no_qs_ena(vf))
+ clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
+
+error_param:
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES, v_ret,
+ NULL, 0);
+}
+
+/**
+ * ice_cfg_interrupt
+ * @vf: pointer to the VF info
+ * @vsi: the VSI being configured
+ * @vector_id: vector ID
+ * @map: vector map for mapping vectors to queues
+ * @q_vector: structure for interrupt vector
+ * configure the IRQ to queue map
+ */
+static int
+ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id,
+ struct virtchnl_vector_map *map,
+ struct ice_q_vector *q_vector)
+{
+ u16 vsi_q_id, vsi_q_id_idx;
+ unsigned long qmap;
+
+ q_vector->num_ring_rx = 0;
+ q_vector->num_ring_tx = 0;
+
+ qmap = map->rxq_map;
+ for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) {
+ vsi_q_id = vsi_q_id_idx;
+
+ if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id))
+ return VIRTCHNL_STATUS_ERR_PARAM;
+
+ q_vector->num_ring_rx++;
+ q_vector->rx.itr_idx = map->rxitr_idx;
+ vsi->rx_rings[vsi_q_id]->q_vector = q_vector;
+ ice_cfg_rxq_interrupt(vsi, vsi_q_id, vector_id,
+ q_vector->rx.itr_idx);
+ }
+
+ qmap = map->txq_map;
+ for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) {
+ vsi_q_id = vsi_q_id_idx;
+
+ if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id))
+ return VIRTCHNL_STATUS_ERR_PARAM;
+
+ q_vector->num_ring_tx++;
+ q_vector->tx.itr_idx = map->txitr_idx;
+ vsi->tx_rings[vsi_q_id]->q_vector = q_vector;
+ ice_cfg_txq_interrupt(vsi, vsi_q_id, vector_id,
+ q_vector->tx.itr_idx);
+ }
+
+ return VIRTCHNL_STATUS_SUCCESS;
+}
+
+/**
+ * ice_vc_cfg_irq_map_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to configure the IRQ to queue map
+ */
+static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ u16 num_q_vectors_mapped, vsi_id, vector_id;
+ struct virtchnl_irq_map_info *irqmap_info;
+ struct virtchnl_vector_map *map;
+ struct ice_pf *pf = vf->pf;
+ struct ice_vsi *vsi;
+ int i;
+
+ irqmap_info = (struct virtchnl_irq_map_info *)msg;
+ num_q_vectors_mapped = irqmap_info->num_vectors;
+
+ /* Check to make sure number of VF vectors mapped is not greater than
+ * number of VF vectors originally allocated, and check that
+ * there is actually at least a single VF queue vector mapped
+ */
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
+ pf->vfs.num_msix_per < num_q_vectors_mapped ||
+ !num_q_vectors_mapped) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ for (i = 0; i < num_q_vectors_mapped; i++) {
+ struct ice_q_vector *q_vector;
+
+ map = &irqmap_info->vecmap[i];
+
+ vector_id = map->vector_id;
+ vsi_id = map->vsi_id;
+ /* vector_id is always 0-based for each VF, and can never be
+ * larger than or equal to the max allowed interrupts per VF
+ */
+ if (!(vector_id < pf->vfs.num_msix_per) ||
+ !ice_vc_isvalid_vsi_id(vf, vsi_id) ||
+ (!vector_id && (map->rxq_map || map->txq_map))) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* No need to map VF miscellaneous or rogue vector */
+ if (!vector_id)
+ continue;
+
+ /* Subtract non queue vector from vector_id passed by VF
+ * to get actual number of VSI queue vector array index
+ */
+ q_vector = vsi->q_vectors[vector_id - ICE_NONQ_VECS_VF];
+ if (!q_vector) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* lookout for the invalid queue index */
+ v_ret = (enum virtchnl_status_code)
+ ice_cfg_interrupt(vf, vsi, vector_id, map, q_vector);
+ if (v_ret)
+ goto error_param;
+ }
+
+error_param:
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP, v_ret,
+ NULL, 0);
+}
+
+/**
+ * ice_vc_cfg_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to configure the Rx/Tx queues
+ */
+static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+ struct virtchnl_vsi_queue_config_info *qci =
+ (struct virtchnl_vsi_queue_config_info *)msg;
+ struct virtchnl_queue_pair_info *qpi;
+ struct ice_pf *pf = vf->pf;
+ struct ice_vsi *vsi;
+ int i = -1, q_idx;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+ goto error_param;
+
+ if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id))
+ goto error_param;
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi)
+ goto error_param;
+
+ if (qci->num_queue_pairs > ICE_MAX_RSS_QS_PER_VF ||
+ qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
+ dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n",
+ vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
+ goto error_param;
+ }
+
+ for (i = 0; i < qci->num_queue_pairs; i++) {
+ qpi = &qci->qpair[i];
+ if (qpi->txq.vsi_id != qci->vsi_id ||
+ qpi->rxq.vsi_id != qci->vsi_id ||
+ qpi->rxq.queue_id != qpi->txq.queue_id ||
+ qpi->txq.headwb_enabled ||
+ !ice_vc_isvalid_ring_len(qpi->txq.ring_len) ||
+ !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) ||
+ !ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) {
+ goto error_param;
+ }
+
+ q_idx = qpi->rxq.queue_id;
+
+ /* make sure selected "q_idx" is in valid range of queues
+ * for selected "vsi"
+ */
+ if (q_idx >= vsi->alloc_txq || q_idx >= vsi->alloc_rxq) {
+ goto error_param;
+ }
+
+ /* copy Tx queue info from VF into VSI */
+ if (qpi->txq.ring_len > 0) {
+ vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr;
+ vsi->tx_rings[i]->count = qpi->txq.ring_len;
+
+ /* Disable any existing queue first */
+ if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx))
+ goto error_param;
+
+ /* Configure a queue with the requested settings */
+ if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) {
+ dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure TX queue %d\n",
+ vf->vf_id, i);
+ goto error_param;
+ }
+ }
+
+ /* copy Rx queue info from VF into VSI */
+ if (qpi->rxq.ring_len > 0) {
+ u16 max_frame_size = ice_vc_get_max_frame_size(vf);
+
+ vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr;
+ vsi->rx_rings[i]->count = qpi->rxq.ring_len;
+
+ if (qpi->rxq.databuffer_size != 0 &&
+ (qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
+ qpi->rxq.databuffer_size < 1024))
+ goto error_param;
+ vsi->rx_buf_len = qpi->rxq.databuffer_size;
+ vsi->rx_rings[i]->rx_buf_len = vsi->rx_buf_len;
+ if (qpi->rxq.max_pkt_size > max_frame_size ||
+ qpi->rxq.max_pkt_size < 64)
+ goto error_param;
+
+ vsi->max_frame = qpi->rxq.max_pkt_size;
+ /* add space for the port VLAN since the VF driver is
+ * not expected to account for it in the MTU
+ * calculation
+ */
+ if (ice_vf_is_port_vlan_ena(vf))
+ vsi->max_frame += VLAN_HLEN;
+
+ if (ice_vsi_cfg_single_rxq(vsi, q_idx)) {
+ dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure RX queue %d\n",
+ vf->vf_id, i);
+ goto error_param;
+ }
+ }
+ }
+
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+ VIRTCHNL_STATUS_SUCCESS, NULL, 0);
+error_param:
+ /* disable whatever we can */
+ for (; i >= 0; i--) {
+ if (ice_vsi_ctrl_one_rx_ring(vsi, false, i, true))
+ dev_err(ice_pf_to_dev(pf), "VF-%d could not disable RX queue %d\n",
+ vf->vf_id, i);
+ if (ice_vf_vsi_dis_single_txq(vf, vsi, i))
+ dev_err(ice_pf_to_dev(pf), "VF-%d could not disable TX queue %d\n",
+ vf->vf_id, i);
+ }
+
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+ VIRTCHNL_STATUS_ERR_PARAM, NULL, 0);
+}
+
+/**
+ * ice_can_vf_change_mac
+ * @vf: pointer to the VF info
+ *
+ * Return true if the VF is allowed to change its MAC filters, false otherwise
+ */
+static bool ice_can_vf_change_mac(struct ice_vf *vf)
+{
+ /* If the VF MAC address has been set administratively (via the
+ * ndo_set_vf_mac command), then deny permission to the VF to
+ * add/delete unicast MAC addresses, unless the VF is trusted
+ */
+ if (vf->pf_set_mac && !ice_is_vf_trusted(vf))
+ return false;
+
+ return true;
+}
+
+/**
+ * ice_vc_ether_addr_type - get type of virtchnl_ether_addr
+ * @vc_ether_addr: used to extract the type
+ */
+static u8
+ice_vc_ether_addr_type(struct virtchnl_ether_addr *vc_ether_addr)
+{
+ return (vc_ether_addr->type & VIRTCHNL_ETHER_ADDR_TYPE_MASK);
+}
+
+/**
+ * ice_is_vc_addr_legacy - check if the MAC address is from an older VF
+ * @vc_ether_addr: VIRTCHNL structure that contains MAC and type
+ */
+static bool
+ice_is_vc_addr_legacy(struct virtchnl_ether_addr *vc_ether_addr)
+{
+ u8 type = ice_vc_ether_addr_type(vc_ether_addr);
+
+ return (type == VIRTCHNL_ETHER_ADDR_LEGACY);
+}
+
+/**
+ * ice_is_vc_addr_primary - check if the MAC address is the VF's primary MAC
+ * @vc_ether_addr: VIRTCHNL structure that contains MAC and type
+ *
+ * This function should only be called when the MAC address in
+ * virtchnl_ether_addr is a valid unicast MAC
+ */
+static bool
+ice_is_vc_addr_primary(struct virtchnl_ether_addr __maybe_unused *vc_ether_addr)
+{
+ u8 type = ice_vc_ether_addr_type(vc_ether_addr);
+
+ return (type == VIRTCHNL_ETHER_ADDR_PRIMARY);
+}
+
+/**
+ * ice_vfhw_mac_add - update the VF's cached hardware MAC if allowed
+ * @vf: VF to update
+ * @vc_ether_addr: structure from VIRTCHNL with MAC to add
+ */
+static void
+ice_vfhw_mac_add(struct ice_vf *vf, struct virtchnl_ether_addr *vc_ether_addr)
+{
+ u8 *mac_addr = vc_ether_addr->addr;
+
+ if (!is_valid_ether_addr(mac_addr))
+ return;
+
+ /* only allow legacy VF drivers to set the device and hardware MAC if it
+ * is zero and allow new VF drivers to set the hardware MAC if the type
+ * was correctly specified over VIRTCHNL
+ */
+ if ((ice_is_vc_addr_legacy(vc_ether_addr) &&
+ is_zero_ether_addr(vf->hw_lan_addr.addr)) ||
+ ice_is_vc_addr_primary(vc_ether_addr)) {
+ ether_addr_copy(vf->dev_lan_addr.addr, mac_addr);
+ ether_addr_copy(vf->hw_lan_addr.addr, mac_addr);
+ }
+
+ /* hardware and device MACs are already set, but its possible that the
+ * VF driver sent the VIRTCHNL_OP_ADD_ETH_ADDR message before the
+ * VIRTCHNL_OP_DEL_ETH_ADDR when trying to update its MAC, so save it
+ * away for the legacy VF driver case as it will be updated in the
+ * delete flow for this case
+ */
+ if (ice_is_vc_addr_legacy(vc_ether_addr)) {
+ ether_addr_copy(vf->legacy_last_added_umac.addr,
+ mac_addr);
+ vf->legacy_last_added_umac.time_modified = jiffies;
+ }
+}
+
+/**
+ * ice_vc_add_mac_addr - attempt to add the MAC address passed in
+ * @vf: pointer to the VF info
+ * @vsi: pointer to the VF's VSI
+ * @vc_ether_addr: VIRTCHNL MAC address structure used to add MAC
+ */
+static int
+ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi,
+ struct virtchnl_ether_addr *vc_ether_addr)
+{
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ u8 *mac_addr = vc_ether_addr->addr;
+ int ret;
+
+ /* device MAC already added */
+ if (ether_addr_equal(mac_addr, vf->dev_lan_addr.addr))
+ return 0;
+
+ if (is_unicast_ether_addr(mac_addr) && !ice_can_vf_change_mac(vf)) {
+ dev_err(dev, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
+ return -EPERM;
+ }
+
+ ret = ice_fltr_add_mac(vsi, mac_addr, ICE_FWD_TO_VSI);
+ if (ret == -EEXIST) {
+ dev_dbg(dev, "MAC %pM already exists for VF %d\n", mac_addr,
+ vf->vf_id);
+ /* don't return since we might need to update
+ * the primary MAC in ice_vfhw_mac_add() below
+ */
+ } else if (ret) {
+ dev_err(dev, "Failed to add MAC %pM for VF %d\n, error %d\n",
+ mac_addr, vf->vf_id, ret);
+ return ret;
+ } else {
+ vf->num_mac++;
+ }
+
+ ice_vfhw_mac_add(vf, vc_ether_addr);
+
+ return ret;
+}
+
+/**
+ * ice_is_legacy_umac_expired - check if last added legacy unicast MAC expired
+ * @last_added_umac: structure used to check expiration
+ */
+static bool ice_is_legacy_umac_expired(struct ice_time_mac *last_added_umac)
+{
+#define ICE_LEGACY_VF_MAC_CHANGE_EXPIRE_TIME msecs_to_jiffies(3000)
+ return time_is_before_jiffies(last_added_umac->time_modified +
+ ICE_LEGACY_VF_MAC_CHANGE_EXPIRE_TIME);
+}
+
+/**
+ * ice_update_legacy_cached_mac - update cached hardware MAC for legacy VF
+ * @vf: VF to update
+ * @vc_ether_addr: structure from VIRTCHNL with MAC to check
+ *
+ * only update cached hardware MAC for legacy VF drivers on delete
+ * because we cannot guarantee order/type of MAC from the VF driver
+ */
+static void
+ice_update_legacy_cached_mac(struct ice_vf *vf,
+ struct virtchnl_ether_addr *vc_ether_addr)
+{
+ if (!ice_is_vc_addr_legacy(vc_ether_addr) ||
+ ice_is_legacy_umac_expired(&vf->legacy_last_added_umac))
+ return;
+
+ ether_addr_copy(vf->dev_lan_addr.addr, vf->legacy_last_added_umac.addr);
+ ether_addr_copy(vf->hw_lan_addr.addr, vf->legacy_last_added_umac.addr);
+}
+
+/**
+ * ice_vfhw_mac_del - update the VF's cached hardware MAC if allowed
+ * @vf: VF to update
+ * @vc_ether_addr: structure from VIRTCHNL with MAC to delete
+ */
+static void
+ice_vfhw_mac_del(struct ice_vf *vf, struct virtchnl_ether_addr *vc_ether_addr)
+{
+ u8 *mac_addr = vc_ether_addr->addr;
+
+ if (!is_valid_ether_addr(mac_addr) ||
+ !ether_addr_equal(vf->dev_lan_addr.addr, mac_addr))
+ return;
+
+ /* allow the device MAC to be repopulated in the add flow and don't
+ * clear the hardware MAC (i.e. hw_lan_addr.addr) here as that is meant
+ * to be persistent on VM reboot and across driver unload/load, which
+ * won't work if we clear the hardware MAC here
+ */
+ eth_zero_addr(vf->dev_lan_addr.addr);
+
+ ice_update_legacy_cached_mac(vf, vc_ether_addr);
+}
+
+/**
+ * ice_vc_del_mac_addr - attempt to delete the MAC address passed in
+ * @vf: pointer to the VF info
+ * @vsi: pointer to the VF's VSI
+ * @vc_ether_addr: VIRTCHNL MAC address structure used to delete MAC
+ */
+static int
+ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi,
+ struct virtchnl_ether_addr *vc_ether_addr)
+{
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ u8 *mac_addr = vc_ether_addr->addr;
+ int status;
+
+ if (!ice_can_vf_change_mac(vf) &&
+ ether_addr_equal(vf->dev_lan_addr.addr, mac_addr))
+ return 0;
+
+ status = ice_fltr_remove_mac(vsi, mac_addr, ICE_FWD_TO_VSI);
+ if (status == -ENOENT) {
+ dev_err(dev, "MAC %pM does not exist for VF %d\n", mac_addr,
+ vf->vf_id);
+ return -ENOENT;
+ } else if (status) {
+ dev_err(dev, "Failed to delete MAC %pM for VF %d, error %d\n",
+ mac_addr, vf->vf_id, status);
+ return -EIO;
+ }
+
+ ice_vfhw_mac_del(vf, vc_ether_addr);
+
+ vf->num_mac--;
+
+ return 0;
+}
+
+/**
+ * ice_vc_handle_mac_addr_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @set: true if MAC filters are being set, false otherwise
+ *
+ * add guest MAC address filter
+ */
+static int
+ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
+{
+ int (*ice_vc_cfg_mac)
+ (struct ice_vf *vf, struct ice_vsi *vsi,
+ struct virtchnl_ether_addr *virtchnl_ether_addr);
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_ether_addr_list *al =
+ (struct virtchnl_ether_addr_list *)msg;
+ struct ice_pf *pf = vf->pf;
+ enum virtchnl_ops vc_op;
+ struct ice_vsi *vsi;
+ int i;
+
+ if (set) {
+ vc_op = VIRTCHNL_OP_ADD_ETH_ADDR;
+ ice_vc_cfg_mac = ice_vc_add_mac_addr;
+ } else {
+ vc_op = VIRTCHNL_OP_DEL_ETH_ADDR;
+ ice_vc_cfg_mac = ice_vc_del_mac_addr;
+ }
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
+ !ice_vc_isvalid_vsi_id(vf, al->vsi_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto handle_mac_exit;
+ }
+
+ /* If this VF is not privileged, then we can't add more than a
+ * limited number of addresses. Check to make sure that the
+ * additions do not push us over the limit.
+ */
+ if (set && !ice_is_vf_trusted(vf) &&
+ (vf->num_mac + al->num_elements) > ICE_MAX_MACADDR_PER_VF) {
+ dev_err(ice_pf_to_dev(pf), "Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n",
+ vf->vf_id);
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto handle_mac_exit;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto handle_mac_exit;
+ }
+
+ for (i = 0; i < al->num_elements; i++) {
+ u8 *mac_addr = al->list[i].addr;
+ int result;
+
+ if (is_broadcast_ether_addr(mac_addr) ||
+ is_zero_ether_addr(mac_addr))
+ continue;
+
+ result = ice_vc_cfg_mac(vf, vsi, &al->list[i]);
+ if (result == -EEXIST || result == -ENOENT) {
+ continue;
+ } else if (result) {
+ v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+ goto handle_mac_exit;
+ }
+ }
+
+handle_mac_exit:
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, vc_op, v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_add_mac_addr_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * add guest MAC address filter
+ */
+static int ice_vc_add_mac_addr_msg(struct ice_vf *vf, u8 *msg)
+{
+ return ice_vc_handle_mac_addr_msg(vf, msg, true);
+}
+
+/**
+ * ice_vc_del_mac_addr_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * remove guest MAC address filter
+ */
+static int ice_vc_del_mac_addr_msg(struct ice_vf *vf, u8 *msg)
+{
+ return ice_vc_handle_mac_addr_msg(vf, msg, false);
+}
+
+/**
+ * ice_vc_request_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * VFs get a default number of queues but can use this message to request a
+ * different number. If the request is successful, PF will reset the VF and
+ * return 0. If unsuccessful, PF will send message informing VF of number of
+ * available queue pairs via virtchnl message response to VF.
+ */
+static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_vf_res_request *vfres =
+ (struct virtchnl_vf_res_request *)msg;
+ u16 req_queues = vfres->num_queue_pairs;
+ struct ice_pf *pf = vf->pf;
+ u16 max_allowed_vf_queues;
+ u16 tx_rx_queue_left;
+ struct device *dev;
+ u16 cur_queues;
+
+ dev = ice_pf_to_dev(pf);
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ cur_queues = vf->num_vf_qs;
+ tx_rx_queue_left = min_t(u16, ice_get_avail_txq_count(pf),
+ ice_get_avail_rxq_count(pf));
+ max_allowed_vf_queues = tx_rx_queue_left + cur_queues;
+ if (!req_queues) {
+ dev_err(dev, "VF %d tried to request 0 queues. Ignoring.\n",
+ vf->vf_id);
+ } else if (req_queues > ICE_MAX_RSS_QS_PER_VF) {
+ dev_err(dev, "VF %d tried to request more than %d queues.\n",
+ vf->vf_id, ICE_MAX_RSS_QS_PER_VF);
+ vfres->num_queue_pairs = ICE_MAX_RSS_QS_PER_VF;
+ } else if (req_queues > cur_queues &&
+ req_queues - cur_queues > tx_rx_queue_left) {
+ dev_warn(dev, "VF %d requested %u more queues, but only %u left.\n",
+ vf->vf_id, req_queues - cur_queues, tx_rx_queue_left);
+ vfres->num_queue_pairs = min_t(u16, max_allowed_vf_queues,
+ ICE_MAX_RSS_QS_PER_VF);
+ } else {
+ /* request is successful, then reset VF */
+ vf->num_req_qs = req_queues;
+ ice_reset_vf(vf, ICE_VF_RESET_NOTIFY);
+ dev_info(dev, "VF %d granted request of %u queues.\n",
+ vf->vf_id, req_queues);
+ return 0;
+ }
+
+error_param:
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES,
+ v_ret, (u8 *)vfres, sizeof(*vfres));
+}
+
+/**
+ * ice_vf_vlan_offload_ena - determine if capabilities support VLAN offloads
+ * @caps: VF driver negotiated capabilities
+ *
+ * Return true if VIRTCHNL_VF_OFFLOAD_VLAN capability is set, else return false
+ */
+static bool ice_vf_vlan_offload_ena(u32 caps)
+{
+ return !!(caps & VIRTCHNL_VF_OFFLOAD_VLAN);
+}
+
+/**
+ * ice_is_vlan_promisc_allowed - check if VLAN promiscuous config is allowed
+ * @vf: VF used to determine if VLAN promiscuous config is allowed
+ */
+static bool ice_is_vlan_promisc_allowed(struct ice_vf *vf)
+{
+ if ((test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
+ test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) &&
+ test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, vf->pf->flags))
+ return true;
+
+ return false;
+}
+
+/**
+ * ice_vf_ena_vlan_promisc - Enable Tx/Rx VLAN promiscuous for the VLAN
+ * @vsi: VF's VSI used to enable VLAN promiscuous mode
+ * @vlan: VLAN used to enable VLAN promiscuous
+ *
+ * This function should only be called if VLAN promiscuous mode is allowed,
+ * which can be determined via ice_is_vlan_promisc_allowed().
+ */
+static int ice_vf_ena_vlan_promisc(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+ u8 promisc_m = ICE_PROMISC_VLAN_TX | ICE_PROMISC_VLAN_RX;
+ int status;
+
+ status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m,
+ vlan->vid);
+ if (status && status != -EEXIST)
+ return status;
+
+ return 0;
+}
+
+/**
+ * ice_vf_dis_vlan_promisc - Disable Tx/Rx VLAN promiscuous for the VLAN
+ * @vsi: VF's VSI used to disable VLAN promiscuous mode for
+ * @vlan: VLAN used to disable VLAN promiscuous
+ *
+ * This function should only be called if VLAN promiscuous mode is allowed,
+ * which can be determined via ice_is_vlan_promisc_allowed().
+ */
+static int ice_vf_dis_vlan_promisc(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+ u8 promisc_m = ICE_PROMISC_VLAN_TX | ICE_PROMISC_VLAN_RX;
+ int status;
+
+ status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m,
+ vlan->vid);
+ if (status && status != -ENOENT)
+ return status;
+
+ return 0;
+}
+
+/**
+ * ice_vf_has_max_vlans - check if VF already has the max allowed VLAN filters
+ * @vf: VF to check against
+ * @vsi: VF's VSI
+ *
+ * If the VF is trusted then the VF is allowed to add as many VLANs as it
+ * wants to, so return false.
+ *
+ * When the VF is untrusted compare the number of non-zero VLANs + 1 to the max
+ * allowed VLANs for an untrusted VF. Return the result of this comparison.
+ */
+static bool ice_vf_has_max_vlans(struct ice_vf *vf, struct ice_vsi *vsi)
+{
+ if (ice_is_vf_trusted(vf))
+ return false;
+
+#define ICE_VF_ADDED_VLAN_ZERO_FLTRS 1
+ return ((ice_vsi_num_non_zero_vlans(vsi) +
+ ICE_VF_ADDED_VLAN_ZERO_FLTRS) >= ICE_MAX_VLAN_PER_VF);
+}
+
+/**
+ * ice_vc_process_vlan_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @add_v: Add VLAN if true, otherwise delete VLAN
+ *
+ * Process virtchnl op to add or remove programmed guest VLAN ID
+ */
+static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_vlan_filter_list *vfl =
+ (struct virtchnl_vlan_filter_list *)msg;
+ struct ice_pf *pf = vf->pf;
+ bool vlan_promisc = false;
+ struct ice_vsi *vsi;
+ struct device *dev;
+ int status = 0;
+ int i;
+
+ dev = ice_pf_to_dev(pf);
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, vfl->vsi_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ for (i = 0; i < vfl->num_elements; i++) {
+ if (vfl->vlan_id[i] >= VLAN_N_VID) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ dev_err(dev, "invalid VF VLAN id %d\n",
+ vfl->vlan_id[i]);
+ goto error_param;
+ }
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (add_v && ice_vf_has_max_vlans(vf, vsi)) {
+ dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
+ vf->vf_id);
+ /* There is no need to let VF know about being not trusted,
+ * so we can just return success message here
+ */
+ goto error_param;
+ }
+
+ /* in DVM a VF can add/delete inner VLAN filters when
+ * VIRTCHNL_VF_OFFLOAD_VLAN is negotiated, so only reject in SVM
+ */
+ if (ice_vf_is_port_vlan_ena(vf) && !ice_is_dvm_ena(&pf->hw)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* in DVM VLAN promiscuous is based on the outer VLAN, which would be
+ * the port VLAN if VIRTCHNL_VF_OFFLOAD_VLAN was negotiated, so only
+ * allow vlan_promisc = true in SVM and if no port VLAN is configured
+ */
+ vlan_promisc = ice_is_vlan_promisc_allowed(vf) &&
+ !ice_is_dvm_ena(&pf->hw) &&
+ !ice_vf_is_port_vlan_ena(vf);
+
+ if (add_v) {
+ for (i = 0; i < vfl->num_elements; i++) {
+ u16 vid = vfl->vlan_id[i];
+ struct ice_vlan vlan;
+
+ if (ice_vf_has_max_vlans(vf, vsi)) {
+ dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
+ vf->vf_id);
+ /* There is no need to let VF know about being
+ * not trusted, so we can just return success
+ * message here as well.
+ */
+ goto error_param;
+ }
+
+ /* we add VLAN 0 by default for each VF so we can enable
+ * Tx VLAN anti-spoof without triggering MDD events so
+ * we don't need to add it again here
+ */
+ if (!vid)
+ continue;
+
+ vlan = ICE_VLAN(ETH_P_8021Q, vid, 0);
+ status = vsi->inner_vlan_ops.add_vlan(vsi, &vlan);
+ if (status) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* Enable VLAN filtering on first non-zero VLAN */
+ if (!vlan_promisc && vid && !ice_is_dvm_ena(&pf->hw)) {
+ if (vf->spoofchk) {
+ status = vsi->inner_vlan_ops.ena_tx_filtering(vsi);
+ if (status) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ dev_err(dev, "Enable VLAN anti-spoofing on VLAN ID: %d failed error-%d\n",
+ vid, status);
+ goto error_param;
+ }
+ }
+ if (vsi->inner_vlan_ops.ena_rx_filtering(vsi)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ dev_err(dev, "Enable VLAN pruning on VLAN ID: %d failed error-%d\n",
+ vid, status);
+ goto error_param;
+ }
+ } else if (vlan_promisc) {
+ status = ice_vf_ena_vlan_promisc(vsi, &vlan);
+ if (status) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ dev_err(dev, "Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n",
+ vid, status);
+ }
+ }
+ }
+ } else {
+ /* In case of non_trusted VF, number of VLAN elements passed
+ * to PF for removal might be greater than number of VLANs
+ * filter programmed for that VF - So, use actual number of
+ * VLANS added earlier with add VLAN opcode. In order to avoid
+ * removing VLAN that doesn't exist, which result to sending
+ * erroneous failed message back to the VF
+ */
+ int num_vf_vlan;
+
+ num_vf_vlan = vsi->num_vlan;
+ for (i = 0; i < vfl->num_elements && i < num_vf_vlan; i++) {
+ u16 vid = vfl->vlan_id[i];
+ struct ice_vlan vlan;
+
+ /* we add VLAN 0 by default for each VF so we can enable
+ * Tx VLAN anti-spoof without triggering MDD events so
+ * we don't want a VIRTCHNL request to remove it
+ */
+ if (!vid)
+ continue;
+
+ vlan = ICE_VLAN(ETH_P_8021Q, vid, 0);
+ status = vsi->inner_vlan_ops.del_vlan(vsi, &vlan);
+ if (status) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* Disable VLAN filtering when only VLAN 0 is left */
+ if (!ice_vsi_has_non_zero_vlans(vsi)) {
+ vsi->inner_vlan_ops.dis_tx_filtering(vsi);
+ vsi->inner_vlan_ops.dis_rx_filtering(vsi);
+ }
+
+ if (vlan_promisc)
+ ice_vf_dis_vlan_promisc(vsi, &vlan);
+ }
+ }
+
+error_param:
+ /* send the response to the VF */
+ if (add_v)
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_VLAN, v_ret,
+ NULL, 0);
+ else
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_VLAN, v_ret,
+ NULL, 0);
+}
+
+/**
+ * ice_vc_add_vlan_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Add and program guest VLAN ID
+ */
+static int ice_vc_add_vlan_msg(struct ice_vf *vf, u8 *msg)
+{
+ return ice_vc_process_vlan_msg(vf, msg, true);
+}
+
+/**
+ * ice_vc_remove_vlan_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * remove programmed guest VLAN ID
+ */
+static int ice_vc_remove_vlan_msg(struct ice_vf *vf, u8 *msg)
+{
+ return ice_vc_process_vlan_msg(vf, msg, false);
+}
+
+/**
+ * ice_vc_ena_vlan_stripping
+ * @vf: pointer to the VF info
+ *
+ * Enable VLAN header stripping for a given VF
+ */
+static int ice_vc_ena_vlan_stripping(struct ice_vf *vf)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (vsi->inner_vlan_ops.ena_stripping(vsi, ETH_P_8021Q))
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+
+error_param:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING,
+ v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_dis_vlan_stripping
+ * @vf: pointer to the VF info
+ *
+ * Disable VLAN header stripping for a given VF
+ */
+static int ice_vc_dis_vlan_stripping(struct ice_vf *vf)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ if (vsi->inner_vlan_ops.dis_stripping(vsi))
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+
+error_param:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
+ v_ret, NULL, 0);
+}
+
+/**
+ * ice_vf_init_vlan_stripping - enable/disable VLAN stripping on initialization
+ * @vf: VF to enable/disable VLAN stripping for on initialization
+ *
+ * Set the default for VLAN stripping based on whether a port VLAN is configured
+ * and the current VLAN mode of the device.
+ */
+static int ice_vf_init_vlan_stripping(struct ice_vf *vf)
+{
+ struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+
+ if (!vsi)
+ return -EINVAL;
+
+ /* don't modify stripping if port VLAN is configured in SVM since the
+ * port VLAN is based on the inner/single VLAN in SVM
+ */
+ if (ice_vf_is_port_vlan_ena(vf) && !ice_is_dvm_ena(&vsi->back->hw))
+ return 0;
+
+ if (ice_vf_vlan_offload_ena(vf->driver_caps))
+ return vsi->inner_vlan_ops.ena_stripping(vsi, ETH_P_8021Q);
+ else
+ return vsi->inner_vlan_ops.dis_stripping(vsi);
+}
+
+static u16 ice_vc_get_max_vlan_fltrs(struct ice_vf *vf)
+{
+ if (vf->trusted)
+ return VLAN_N_VID;
+ else
+ return ICE_MAX_VLAN_PER_VF;
+}
+
+/**
+ * ice_vf_outer_vlan_not_allowed - check if outer VLAN can be used
+ * @vf: VF that being checked for
+ *
+ * When the device is in double VLAN mode, check whether or not the outer VLAN
+ * is allowed.
+ */
+static bool ice_vf_outer_vlan_not_allowed(struct ice_vf *vf)
+{
+ if (ice_vf_is_port_vlan_ena(vf))
+ return true;
+
+ return false;
+}
+
+/**
+ * ice_vc_set_dvm_caps - set VLAN capabilities when the device is in DVM
+ * @vf: VF that capabilities are being set for
+ * @caps: VLAN capabilities to populate
+ *
+ * Determine VLAN capabilities support based on whether a port VLAN is
+ * configured. If a port VLAN is configured then the VF should use the inner
+ * filtering/offload capabilities since the port VLAN is using the outer VLAN
+ * capabilies.
+ */
+static void
+ice_vc_set_dvm_caps(struct ice_vf *vf, struct virtchnl_vlan_caps *caps)
+{
+ struct virtchnl_vlan_supported_caps *supported_caps;
+
+ if (ice_vf_outer_vlan_not_allowed(vf)) {
+ /* until support for inner VLAN filtering is added when a port
+ * VLAN is configured, only support software offloaded inner
+ * VLANs when a port VLAN is confgured in DVM
+ */
+ supported_caps = &caps->filtering.filtering_support;
+ supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ supported_caps = &caps->offloads.stripping_support;
+ supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_TOGGLE |
+ VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+ supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ supported_caps = &caps->offloads.insertion_support;
+ supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_TOGGLE |
+ VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+ supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ caps->offloads.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100;
+ caps->offloads.ethertype_match =
+ VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION;
+ } else {
+ supported_caps = &caps->filtering.filtering_support;
+ supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED;
+ supported_caps->outer = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+ VIRTCHNL_VLAN_ETHERTYPE_9100 |
+ VIRTCHNL_VLAN_ETHERTYPE_AND;
+ caps->filtering.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+ VIRTCHNL_VLAN_ETHERTYPE_9100;
+
+ supported_caps = &caps->offloads.stripping_support;
+ supported_caps->inner = VIRTCHNL_VLAN_TOGGLE |
+ VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+ supported_caps->outer = VIRTCHNL_VLAN_TOGGLE |
+ VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+ VIRTCHNL_VLAN_ETHERTYPE_9100 |
+ VIRTCHNL_VLAN_ETHERTYPE_XOR |
+ VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2;
+
+ supported_caps = &caps->offloads.insertion_support;
+ supported_caps->inner = VIRTCHNL_VLAN_TOGGLE |
+ VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+ supported_caps->outer = VIRTCHNL_VLAN_TOGGLE |
+ VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+ VIRTCHNL_VLAN_ETHERTYPE_9100 |
+ VIRTCHNL_VLAN_ETHERTYPE_XOR |
+ VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2;
+
+ caps->offloads.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100;
+
+ caps->offloads.ethertype_match =
+ VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION;
+ }
+
+ caps->filtering.max_filters = ice_vc_get_max_vlan_fltrs(vf);
+}
+
+/**
+ * ice_vc_set_svm_caps - set VLAN capabilities when the device is in SVM
+ * @vf: VF that capabilities are being set for
+ * @caps: VLAN capabilities to populate
+ *
+ * Determine VLAN capabilities support based on whether a port VLAN is
+ * configured. If a port VLAN is configured then the VF does not have any VLAN
+ * filtering or offload capabilities since the port VLAN is using the inner VLAN
+ * capabilities in single VLAN mode (SVM). Otherwise allow the VF to use inner
+ * VLAN fitlering and offload capabilities.
+ */
+static void
+ice_vc_set_svm_caps(struct ice_vf *vf, struct virtchnl_vlan_caps *caps)
+{
+ struct virtchnl_vlan_supported_caps *supported_caps;
+
+ if (ice_vf_is_port_vlan_ena(vf)) {
+ supported_caps = &caps->filtering.filtering_support;
+ supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED;
+ supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ supported_caps = &caps->offloads.stripping_support;
+ supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED;
+ supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ supported_caps = &caps->offloads.insertion_support;
+ supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED;
+ supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ caps->offloads.ethertype_init = VIRTCHNL_VLAN_UNSUPPORTED;
+ caps->offloads.ethertype_match = VIRTCHNL_VLAN_UNSUPPORTED;
+ caps->filtering.max_filters = 0;
+ } else {
+ supported_caps = &caps->filtering.filtering_support;
+ supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100;
+ supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+ caps->filtering.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100;
+
+ supported_caps = &caps->offloads.stripping_support;
+ supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_TOGGLE |
+ VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+ supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ supported_caps = &caps->offloads.insertion_support;
+ supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ VIRTCHNL_VLAN_TOGGLE |
+ VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+ supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ caps->offloads.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100;
+ caps->offloads.ethertype_match =
+ VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION;
+ caps->filtering.max_filters = ice_vc_get_max_vlan_fltrs(vf);
+ }
+}
+
+/**
+ * ice_vc_get_offload_vlan_v2_caps - determine VF's VLAN capabilities
+ * @vf: VF to determine VLAN capabilities for
+ *
+ * This will only be called if the VF and PF successfully negotiated
+ * VIRTCHNL_VF_OFFLOAD_VLAN_V2.
+ *
+ * Set VLAN capabilities based on the current VLAN mode and whether a port VLAN
+ * is configured or not.
+ */
+static int ice_vc_get_offload_vlan_v2_caps(struct ice_vf *vf)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_vlan_caps *caps = NULL;
+ int err, len = 0;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+ if (!caps) {
+ v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+ goto out;
+ }
+ len = sizeof(*caps);
+
+ if (ice_is_dvm_ena(&vf->pf->hw))
+ ice_vc_set_dvm_caps(vf, caps);
+ else
+ ice_vc_set_svm_caps(vf, caps);
+
+ /* store negotiated caps to prevent invalid VF messages */
+ memcpy(&vf->vlan_v2_caps, caps, sizeof(*caps));
+
+out:
+ err = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS,
+ v_ret, (u8 *)caps, len);
+ kfree(caps);
+ return err;
+}
+
+/**
+ * ice_vc_validate_vlan_tpid - validate VLAN TPID
+ * @filtering_caps: negotiated/supported VLAN filtering capabilities
+ * @tpid: VLAN TPID used for validation
+ *
+ * Convert the VLAN TPID to a VIRTCHNL_VLAN_ETHERTYPE_* and then compare against
+ * the negotiated/supported filtering caps to see if the VLAN TPID is valid.
+ */
+static bool ice_vc_validate_vlan_tpid(u16 filtering_caps, u16 tpid)
+{
+ enum virtchnl_vlan_support vlan_ethertype = VIRTCHNL_VLAN_UNSUPPORTED;
+
+ switch (tpid) {
+ case ETH_P_8021Q:
+ vlan_ethertype = VIRTCHNL_VLAN_ETHERTYPE_8100;
+ break;
+ case ETH_P_8021AD:
+ vlan_ethertype = VIRTCHNL_VLAN_ETHERTYPE_88A8;
+ break;
+ case ETH_P_QINQ1:
+ vlan_ethertype = VIRTCHNL_VLAN_ETHERTYPE_9100;
+ break;
+ }
+
+ if (!(filtering_caps & vlan_ethertype))
+ return false;
+
+ return true;
+}
+
+/**
+ * ice_vc_is_valid_vlan - validate the virtchnl_vlan
+ * @vc_vlan: virtchnl_vlan to validate
+ *
+ * If the VLAN TCI and VLAN TPID are 0, then this filter is invalid, so return
+ * false. Otherwise return true.
+ */
+static bool ice_vc_is_valid_vlan(struct virtchnl_vlan *vc_vlan)
+{
+ if (!vc_vlan->tci || !vc_vlan->tpid)
+ return false;
+
+ return true;
+}
+
+/**
+ * ice_vc_validate_vlan_filter_list - validate the filter list from the VF
+ * @vfc: negotiated/supported VLAN filtering capabilities
+ * @vfl: VLAN filter list from VF to validate
+ *
+ * Validate all of the filters in the VLAN filter list from the VF. If any of
+ * the checks fail then return false. Otherwise return true.
+ */
+static bool
+ice_vc_validate_vlan_filter_list(struct virtchnl_vlan_filtering_caps *vfc,
+ struct virtchnl_vlan_filter_list_v2 *vfl)
+{
+ u16 i;
+
+ if (!vfl->num_elements)
+ return false;
+
+ for (i = 0; i < vfl->num_elements; i++) {
+ struct virtchnl_vlan_supported_caps *filtering_support =
+ &vfc->filtering_support;
+ struct virtchnl_vlan_filter *vlan_fltr = &vfl->filters[i];
+ struct virtchnl_vlan *outer = &vlan_fltr->outer;
+ struct virtchnl_vlan *inner = &vlan_fltr->inner;
+
+ if ((ice_vc_is_valid_vlan(outer) &&
+ filtering_support->outer == VIRTCHNL_VLAN_UNSUPPORTED) ||
+ (ice_vc_is_valid_vlan(inner) &&
+ filtering_support->inner == VIRTCHNL_VLAN_UNSUPPORTED))
+ return false;
+
+ if ((outer->tci_mask &&
+ !(filtering_support->outer & VIRTCHNL_VLAN_FILTER_MASK)) ||
+ (inner->tci_mask &&
+ !(filtering_support->inner & VIRTCHNL_VLAN_FILTER_MASK)))
+ return false;
+
+ if (((outer->tci & VLAN_PRIO_MASK) &&
+ !(filtering_support->outer & VIRTCHNL_VLAN_PRIO)) ||
+ ((inner->tci & VLAN_PRIO_MASK) &&
+ !(filtering_support->inner & VIRTCHNL_VLAN_PRIO)))
+ return false;
+
+ if ((ice_vc_is_valid_vlan(outer) &&
+ !ice_vc_validate_vlan_tpid(filtering_support->outer,
+ outer->tpid)) ||
+ (ice_vc_is_valid_vlan(inner) &&
+ !ice_vc_validate_vlan_tpid(filtering_support->inner,
+ inner->tpid)))
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * ice_vc_to_vlan - transform from struct virtchnl_vlan to struct ice_vlan
+ * @vc_vlan: struct virtchnl_vlan to transform
+ */
+static struct ice_vlan ice_vc_to_vlan(struct virtchnl_vlan *vc_vlan)
+{
+ struct ice_vlan vlan = { 0 };
+
+ vlan.prio = (vc_vlan->tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ vlan.vid = vc_vlan->tci & VLAN_VID_MASK;
+ vlan.tpid = vc_vlan->tpid;
+
+ return vlan;
+}
+
+/**
+ * ice_vc_vlan_action - action to perform on the virthcnl_vlan
+ * @vsi: VF's VSI used to perform the action
+ * @vlan_action: function to perform the action with (i.e. add/del)
+ * @vlan: VLAN filter to perform the action with
+ */
+static int
+ice_vc_vlan_action(struct ice_vsi *vsi,
+ int (*vlan_action)(struct ice_vsi *, struct ice_vlan *),
+ struct ice_vlan *vlan)
+{
+ int err;
+
+ err = vlan_action(vsi, vlan);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/**
+ * ice_vc_del_vlans - delete VLAN(s) from the virtchnl filter list
+ * @vf: VF used to delete the VLAN(s)
+ * @vsi: VF's VSI used to delete the VLAN(s)
+ * @vfl: virthchnl filter list used to delete the filters
+ */
+static int
+ice_vc_del_vlans(struct ice_vf *vf, struct ice_vsi *vsi,
+ struct virtchnl_vlan_filter_list_v2 *vfl)
+{
+ bool vlan_promisc = ice_is_vlan_promisc_allowed(vf);
+ int err;
+ u16 i;
+
+ for (i = 0; i < vfl->num_elements; i++) {
+ struct virtchnl_vlan_filter *vlan_fltr = &vfl->filters[i];
+ struct virtchnl_vlan *vc_vlan;
+
+ vc_vlan = &vlan_fltr->outer;
+ if (ice_vc_is_valid_vlan(vc_vlan)) {
+ struct ice_vlan vlan = ice_vc_to_vlan(vc_vlan);
+
+ err = ice_vc_vlan_action(vsi,
+ vsi->outer_vlan_ops.del_vlan,
+ &vlan);
+ if (err)
+ return err;
+
+ if (vlan_promisc)
+ ice_vf_dis_vlan_promisc(vsi, &vlan);
+
+ /* Disable VLAN filtering when only VLAN 0 is left */
+ if (!ice_vsi_has_non_zero_vlans(vsi) && ice_is_dvm_ena(&vsi->back->hw)) {
+ err = vsi->outer_vlan_ops.dis_tx_filtering(vsi);
+ if (err)
+ return err;
+ }
+ }
+
+ vc_vlan = &vlan_fltr->inner;
+ if (ice_vc_is_valid_vlan(vc_vlan)) {
+ struct ice_vlan vlan = ice_vc_to_vlan(vc_vlan);
+
+ err = ice_vc_vlan_action(vsi,
+ vsi->inner_vlan_ops.del_vlan,
+ &vlan);
+ if (err)
+ return err;
+
+ /* no support for VLAN promiscuous on inner VLAN unless
+ * we are in Single VLAN Mode (SVM)
+ */
+ if (!ice_is_dvm_ena(&vsi->back->hw)) {
+ if (vlan_promisc)
+ ice_vf_dis_vlan_promisc(vsi, &vlan);
+
+ /* Disable VLAN filtering when only VLAN 0 is left */
+ if (!ice_vsi_has_non_zero_vlans(vsi)) {
+ err = vsi->inner_vlan_ops.dis_tx_filtering(vsi);
+ if (err)
+ return err;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vc_remove_vlan_v2_msg - virtchnl handler for VIRTCHNL_OP_DEL_VLAN_V2
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ */
+static int ice_vc_remove_vlan_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+ struct virtchnl_vlan_filter_list_v2 *vfl =
+ (struct virtchnl_vlan_filter_list_v2 *)msg;
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct ice_vsi *vsi;
+
+ if (!ice_vc_validate_vlan_filter_list(&vf->vlan_v2_caps.filtering,
+ vfl)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, vfl->vport_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (ice_vc_del_vlans(vf, vsi, vfl))
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+
+out:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_VLAN_V2, v_ret, NULL,
+ 0);
+}
+
+/**
+ * ice_vc_add_vlans - add VLAN(s) from the virtchnl filter list
+ * @vf: VF used to add the VLAN(s)
+ * @vsi: VF's VSI used to add the VLAN(s)
+ * @vfl: virthchnl filter list used to add the filters
+ */
+static int
+ice_vc_add_vlans(struct ice_vf *vf, struct ice_vsi *vsi,
+ struct virtchnl_vlan_filter_list_v2 *vfl)
+{
+ bool vlan_promisc = ice_is_vlan_promisc_allowed(vf);
+ int err;
+ u16 i;
+
+ for (i = 0; i < vfl->num_elements; i++) {
+ struct virtchnl_vlan_filter *vlan_fltr = &vfl->filters[i];
+ struct virtchnl_vlan *vc_vlan;
+
+ vc_vlan = &vlan_fltr->outer;
+ if (ice_vc_is_valid_vlan(vc_vlan)) {
+ struct ice_vlan vlan = ice_vc_to_vlan(vc_vlan);
+
+ err = ice_vc_vlan_action(vsi,
+ vsi->outer_vlan_ops.add_vlan,
+ &vlan);
+ if (err)
+ return err;
+
+ if (vlan_promisc) {
+ err = ice_vf_ena_vlan_promisc(vsi, &vlan);
+ if (err)
+ return err;
+ }
+
+ /* Enable VLAN filtering on first non-zero VLAN */
+ if (vf->spoofchk && vlan.vid && ice_is_dvm_ena(&vsi->back->hw)) {
+ err = vsi->outer_vlan_ops.ena_tx_filtering(vsi);
+ if (err)
+ return err;
+ }
+ }
+
+ vc_vlan = &vlan_fltr->inner;
+ if (ice_vc_is_valid_vlan(vc_vlan)) {
+ struct ice_vlan vlan = ice_vc_to_vlan(vc_vlan);
+
+ err = ice_vc_vlan_action(vsi,
+ vsi->inner_vlan_ops.add_vlan,
+ &vlan);
+ if (err)
+ return err;
+
+ /* no support for VLAN promiscuous on inner VLAN unless
+ * we are in Single VLAN Mode (SVM)
+ */
+ if (!ice_is_dvm_ena(&vsi->back->hw)) {
+ if (vlan_promisc) {
+ err = ice_vf_ena_vlan_promisc(vsi, &vlan);
+ if (err)
+ return err;
+ }
+
+ /* Enable VLAN filtering on first non-zero VLAN */
+ if (vf->spoofchk && vlan.vid) {
+ err = vsi->inner_vlan_ops.ena_tx_filtering(vsi);
+ if (err)
+ return err;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vc_validate_add_vlan_filter_list - validate add filter list from the VF
+ * @vsi: VF VSI used to get number of existing VLAN filters
+ * @vfc: negotiated/supported VLAN filtering capabilities
+ * @vfl: VLAN filter list from VF to validate
+ *
+ * Validate all of the filters in the VLAN filter list from the VF during the
+ * VIRTCHNL_OP_ADD_VLAN_V2 opcode. If any of the checks fail then return false.
+ * Otherwise return true.
+ */
+static bool
+ice_vc_validate_add_vlan_filter_list(struct ice_vsi *vsi,
+ struct virtchnl_vlan_filtering_caps *vfc,
+ struct virtchnl_vlan_filter_list_v2 *vfl)
+{
+ u16 num_requested_filters = ice_vsi_num_non_zero_vlans(vsi) +
+ vfl->num_elements;
+
+ if (num_requested_filters > vfc->max_filters)
+ return false;
+
+ return ice_vc_validate_vlan_filter_list(vfc, vfl);
+}
+
+/**
+ * ice_vc_add_vlan_v2_msg - virtchnl handler for VIRTCHNL_OP_ADD_VLAN_V2
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ */
+static int ice_vc_add_vlan_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_vlan_filter_list_v2 *vfl =
+ (struct virtchnl_vlan_filter_list_v2 *)msg;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, vfl->vport_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (!ice_vc_validate_add_vlan_filter_list(vsi,
+ &vf->vlan_v2_caps.filtering,
+ vfl)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (ice_vc_add_vlans(vf, vsi, vfl))
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+
+out:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_VLAN_V2, v_ret, NULL,
+ 0);
+}
+
+/**
+ * ice_vc_valid_vlan_setting - validate VLAN setting
+ * @negotiated_settings: negotiated VLAN settings during VF init
+ * @ethertype_setting: ethertype(s) requested for the VLAN setting
+ */
+static bool
+ice_vc_valid_vlan_setting(u32 negotiated_settings, u32 ethertype_setting)
+{
+ if (ethertype_setting && !(negotiated_settings & ethertype_setting))
+ return false;
+
+ /* only allow a single VIRTCHNL_VLAN_ETHERTYPE if
+ * VIRTHCNL_VLAN_ETHERTYPE_AND is not negotiated/supported
+ */
+ if (!(negotiated_settings & VIRTCHNL_VLAN_ETHERTYPE_AND) &&
+ hweight32(ethertype_setting) > 1)
+ return false;
+
+ /* ability to modify the VLAN setting was not negotiated */
+ if (!(negotiated_settings & VIRTCHNL_VLAN_TOGGLE))
+ return false;
+
+ return true;
+}
+
+/**
+ * ice_vc_valid_vlan_setting_msg - validate the VLAN setting message
+ * @caps: negotiated VLAN settings during VF init
+ * @msg: message to validate
+ *
+ * Used to validate any VLAN virtchnl message sent as a
+ * virtchnl_vlan_setting structure. Validates the message against the
+ * negotiated/supported caps during VF driver init.
+ */
+static bool
+ice_vc_valid_vlan_setting_msg(struct virtchnl_vlan_supported_caps *caps,
+ struct virtchnl_vlan_setting *msg)
+{
+ if ((!msg->outer_ethertype_setting &&
+ !msg->inner_ethertype_setting) ||
+ (!caps->outer && !caps->inner))
+ return false;
+
+ if (msg->outer_ethertype_setting &&
+ !ice_vc_valid_vlan_setting(caps->outer,
+ msg->outer_ethertype_setting))
+ return false;
+
+ if (msg->inner_ethertype_setting &&
+ !ice_vc_valid_vlan_setting(caps->inner,
+ msg->inner_ethertype_setting))
+ return false;
+
+ return true;
+}
+
+/**
+ * ice_vc_get_tpid - transform from VIRTCHNL_VLAN_ETHERTYPE_* to VLAN TPID
+ * @ethertype_setting: VIRTCHNL_VLAN_ETHERTYPE_* used to get VLAN TPID
+ * @tpid: VLAN TPID to populate
+ */
+static int ice_vc_get_tpid(u32 ethertype_setting, u16 *tpid)
+{
+ switch (ethertype_setting) {
+ case VIRTCHNL_VLAN_ETHERTYPE_8100:
+ *tpid = ETH_P_8021Q;
+ break;
+ case VIRTCHNL_VLAN_ETHERTYPE_88A8:
+ *tpid = ETH_P_8021AD;
+ break;
+ case VIRTCHNL_VLAN_ETHERTYPE_9100:
+ *tpid = ETH_P_QINQ1;
+ break;
+ default:
+ *tpid = 0;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vc_ena_vlan_offload - enable VLAN offload based on the ethertype_setting
+ * @vsi: VF's VSI used to enable the VLAN offload
+ * @ena_offload: function used to enable the VLAN offload
+ * @ethertype_setting: VIRTCHNL_VLAN_ETHERTYPE_* to enable offloads for
+ */
+static int
+ice_vc_ena_vlan_offload(struct ice_vsi *vsi,
+ int (*ena_offload)(struct ice_vsi *vsi, u16 tpid),
+ u32 ethertype_setting)
+{
+ u16 tpid;
+ int err;
+
+ err = ice_vc_get_tpid(ethertype_setting, &tpid);
+ if (err)
+ return err;
+
+ err = ena_offload(vsi, tpid);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+#define ICE_L2TSEL_QRX_CONTEXT_REG_IDX 3
+#define ICE_L2TSEL_BIT_OFFSET 23
+enum ice_l2tsel {
+ ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG2_2ND,
+ ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG1,
+};
+
+/**
+ * ice_vsi_update_l2tsel - update l2tsel field for all Rx rings on this VSI
+ * @vsi: VSI used to update l2tsel on
+ * @l2tsel: l2tsel setting requested
+ *
+ * Use the l2tsel setting to update all of the Rx queue context bits for l2tsel.
+ * This will modify which descriptor field the first offloaded VLAN will be
+ * stripped into.
+ */
+static void ice_vsi_update_l2tsel(struct ice_vsi *vsi, enum ice_l2tsel l2tsel)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ u32 l2tsel_bit;
+ int i;
+
+ if (l2tsel == ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG2_2ND)
+ l2tsel_bit = 0;
+ else
+ l2tsel_bit = BIT(ICE_L2TSEL_BIT_OFFSET);
+
+ for (i = 0; i < vsi->alloc_rxq; i++) {
+ u16 pfq = vsi->rxq_map[i];
+ u32 qrx_context_offset;
+ u32 regval;
+
+ qrx_context_offset =
+ QRX_CONTEXT(ICE_L2TSEL_QRX_CONTEXT_REG_IDX, pfq);
+
+ regval = rd32(hw, qrx_context_offset);
+ regval &= ~BIT(ICE_L2TSEL_BIT_OFFSET);
+ regval |= l2tsel_bit;
+ wr32(hw, qrx_context_offset, regval);
+ }
+}
+
+/**
+ * ice_vc_ena_vlan_stripping_v2_msg
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ *
+ * virthcnl handler for VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2
+ */
+static int ice_vc_ena_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_vlan_supported_caps *stripping_support;
+ struct virtchnl_vlan_setting *strip_msg =
+ (struct virtchnl_vlan_setting *)msg;
+ u32 ethertype_setting;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, strip_msg->vport_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ stripping_support = &vf->vlan_v2_caps.offloads.stripping_support;
+ if (!ice_vc_valid_vlan_setting_msg(stripping_support, strip_msg)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ ethertype_setting = strip_msg->outer_ethertype_setting;
+ if (ethertype_setting) {
+ if (ice_vc_ena_vlan_offload(vsi,
+ vsi->outer_vlan_ops.ena_stripping,
+ ethertype_setting)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ } else {
+ enum ice_l2tsel l2tsel =
+ ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG2_2ND;
+
+ /* PF tells the VF that the outer VLAN tag is always
+ * extracted to VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2 and
+ * inner is always extracted to
+ * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1. This is needed to
+ * support outer stripping so the first tag always ends
+ * up in L2TAG2_2ND and the second/inner tag, if
+ * enabled, is extracted in L2TAG1.
+ */
+ ice_vsi_update_l2tsel(vsi, l2tsel);
+ }
+ }
+
+ ethertype_setting = strip_msg->inner_ethertype_setting;
+ if (ethertype_setting &&
+ ice_vc_ena_vlan_offload(vsi, vsi->inner_vlan_ops.ena_stripping,
+ ethertype_setting)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+out:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2,
+ v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_dis_vlan_stripping_v2_msg
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ *
+ * virthcnl handler for VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2
+ */
+static int ice_vc_dis_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_vlan_supported_caps *stripping_support;
+ struct virtchnl_vlan_setting *strip_msg =
+ (struct virtchnl_vlan_setting *)msg;
+ u32 ethertype_setting;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, strip_msg->vport_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ stripping_support = &vf->vlan_v2_caps.offloads.stripping_support;
+ if (!ice_vc_valid_vlan_setting_msg(stripping_support, strip_msg)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ ethertype_setting = strip_msg->outer_ethertype_setting;
+ if (ethertype_setting) {
+ if (vsi->outer_vlan_ops.dis_stripping(vsi)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ } else {
+ enum ice_l2tsel l2tsel =
+ ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG1;
+
+ /* PF tells the VF that the outer VLAN tag is always
+ * extracted to VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2 and
+ * inner is always extracted to
+ * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1. This is needed to
+ * support inner stripping while outer stripping is
+ * disabled so that the first and only tag is extracted
+ * in L2TAG1.
+ */
+ ice_vsi_update_l2tsel(vsi, l2tsel);
+ }
+ }
+
+ ethertype_setting = strip_msg->inner_ethertype_setting;
+ if (ethertype_setting && vsi->inner_vlan_ops.dis_stripping(vsi)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+out:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2,
+ v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_ena_vlan_insertion_v2_msg
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ *
+ * virthcnl handler for VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2
+ */
+static int ice_vc_ena_vlan_insertion_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_vlan_supported_caps *insertion_support;
+ struct virtchnl_vlan_setting *insertion_msg =
+ (struct virtchnl_vlan_setting *)msg;
+ u32 ethertype_setting;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, insertion_msg->vport_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ insertion_support = &vf->vlan_v2_caps.offloads.insertion_support;
+ if (!ice_vc_valid_vlan_setting_msg(insertion_support, insertion_msg)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ ethertype_setting = insertion_msg->outer_ethertype_setting;
+ if (ethertype_setting &&
+ ice_vc_ena_vlan_offload(vsi, vsi->outer_vlan_ops.ena_insertion,
+ ethertype_setting)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ ethertype_setting = insertion_msg->inner_ethertype_setting;
+ if (ethertype_setting &&
+ ice_vc_ena_vlan_offload(vsi, vsi->inner_vlan_ops.ena_insertion,
+ ethertype_setting)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+out:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2,
+ v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_dis_vlan_insertion_v2_msg
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ *
+ * virthcnl handler for VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2
+ */
+static int ice_vc_dis_vlan_insertion_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_vlan_supported_caps *insertion_support;
+ struct virtchnl_vlan_setting *insertion_msg =
+ (struct virtchnl_vlan_setting *)msg;
+ u32 ethertype_setting;
+ struct ice_vsi *vsi;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ if (!ice_vc_isvalid_vsi_id(vf, insertion_msg->vport_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ insertion_support = &vf->vlan_v2_caps.offloads.insertion_support;
+ if (!ice_vc_valid_vlan_setting_msg(insertion_support, insertion_msg)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ ethertype_setting = insertion_msg->outer_ethertype_setting;
+ if (ethertype_setting && vsi->outer_vlan_ops.dis_insertion(vsi)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+ ethertype_setting = insertion_msg->inner_ethertype_setting;
+ if (ethertype_setting && vsi->inner_vlan_ops.dis_insertion(vsi)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto out;
+ }
+
+out:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2,
+ v_ret, NULL, 0);
+}
+
+static const struct ice_virtchnl_ops ice_virtchnl_dflt_ops = {
+ .get_ver_msg = ice_vc_get_ver_msg,
+ .get_vf_res_msg = ice_vc_get_vf_res_msg,
+ .reset_vf = ice_vc_reset_vf_msg,
+ .add_mac_addr_msg = ice_vc_add_mac_addr_msg,
+ .del_mac_addr_msg = ice_vc_del_mac_addr_msg,
+ .cfg_qs_msg = ice_vc_cfg_qs_msg,
+ .ena_qs_msg = ice_vc_ena_qs_msg,
+ .dis_qs_msg = ice_vc_dis_qs_msg,
+ .request_qs_msg = ice_vc_request_qs_msg,
+ .cfg_irq_map_msg = ice_vc_cfg_irq_map_msg,
+ .config_rss_key = ice_vc_config_rss_key,
+ .config_rss_lut = ice_vc_config_rss_lut,
+ .get_stats_msg = ice_vc_get_stats_msg,
+ .cfg_promiscuous_mode_msg = ice_vc_cfg_promiscuous_mode_msg,
+ .add_vlan_msg = ice_vc_add_vlan_msg,
+ .remove_vlan_msg = ice_vc_remove_vlan_msg,
+ .ena_vlan_stripping = ice_vc_ena_vlan_stripping,
+ .dis_vlan_stripping = ice_vc_dis_vlan_stripping,
+ .handle_rss_cfg_msg = ice_vc_handle_rss_cfg,
+ .add_fdir_fltr_msg = ice_vc_add_fdir_fltr,
+ .del_fdir_fltr_msg = ice_vc_del_fdir_fltr,
+ .get_offload_vlan_v2_caps = ice_vc_get_offload_vlan_v2_caps,
+ .add_vlan_v2_msg = ice_vc_add_vlan_v2_msg,
+ .remove_vlan_v2_msg = ice_vc_remove_vlan_v2_msg,
+ .ena_vlan_stripping_v2_msg = ice_vc_ena_vlan_stripping_v2_msg,
+ .dis_vlan_stripping_v2_msg = ice_vc_dis_vlan_stripping_v2_msg,
+ .ena_vlan_insertion_v2_msg = ice_vc_ena_vlan_insertion_v2_msg,
+ .dis_vlan_insertion_v2_msg = ice_vc_dis_vlan_insertion_v2_msg,
+};
+
+/**
+ * ice_virtchnl_set_dflt_ops - Switch to default virtchnl ops
+ * @vf: the VF to switch ops
+ */
+void ice_virtchnl_set_dflt_ops(struct ice_vf *vf)
+{
+ vf->virtchnl_ops = &ice_virtchnl_dflt_ops;
+}
+
+/**
+ * ice_vc_repr_add_mac
+ * @vf: pointer to VF
+ * @msg: virtchannel message
+ *
+ * When port representors are created, we do not add MAC rule
+ * to firmware, we store it so that PF could report same
+ * MAC as VF.
+ */
+static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg)
+{
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+ struct virtchnl_ether_addr_list *al =
+ (struct virtchnl_ether_addr_list *)msg;
+ struct ice_vsi *vsi;
+ struct ice_pf *pf;
+ int i;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
+ !ice_vc_isvalid_vsi_id(vf, al->vsi_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto handle_mac_exit;
+ }
+
+ pf = vf->pf;
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto handle_mac_exit;
+ }
+
+ for (i = 0; i < al->num_elements; i++) {
+ u8 *mac_addr = al->list[i].addr;
+ int result;
+
+ if (!is_unicast_ether_addr(mac_addr) ||
+ ether_addr_equal(mac_addr, vf->hw_lan_addr.addr))
+ continue;
+
+ if (vf->pf_set_mac) {
+ dev_err(ice_pf_to_dev(pf), "VF attempting to override administratively set MAC address\n");
+ v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
+ goto handle_mac_exit;
+ }
+
+ result = ice_eswitch_add_vf_mac_rule(pf, vf, mac_addr);
+ if (result) {
+ dev_err(ice_pf_to_dev(pf), "Failed to add MAC %pM for VF %d\n, error %d\n",
+ mac_addr, vf->vf_id, result);
+ goto handle_mac_exit;
+ }
+
+ ice_vfhw_mac_add(vf, &al->list[i]);
+ vf->num_mac++;
+ break;
+ }
+
+handle_mac_exit:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_ETH_ADDR,
+ v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_repr_del_mac - response with success for deleting MAC
+ * @vf: pointer to VF
+ * @msg: virtchannel message
+ *
+ * Respond with success to not break normal VF flow.
+ * For legacy VF driver try to update cached MAC address.
+ */
+static int
+ice_vc_repr_del_mac(struct ice_vf __always_unused *vf, u8 __always_unused *msg)
+{
+ struct virtchnl_ether_addr_list *al =
+ (struct virtchnl_ether_addr_list *)msg;
+
+ ice_update_legacy_cached_mac(vf, &al->list[0]);
+
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_ETH_ADDR,
+ VIRTCHNL_STATUS_SUCCESS, NULL, 0);
+}
+
+static int
+ice_vc_repr_cfg_promiscuous_mode(struct ice_vf *vf, u8 __always_unused *msg)
+{
+ dev_dbg(ice_pf_to_dev(vf->pf),
+ "Can't config promiscuous mode in switchdev mode for VF %d\n",
+ vf->vf_id);
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+ VIRTCHNL_STATUS_ERR_NOT_SUPPORTED,
+ NULL, 0);
+}
+
+static const struct ice_virtchnl_ops ice_virtchnl_repr_ops = {
+ .get_ver_msg = ice_vc_get_ver_msg,
+ .get_vf_res_msg = ice_vc_get_vf_res_msg,
+ .reset_vf = ice_vc_reset_vf_msg,
+ .add_mac_addr_msg = ice_vc_repr_add_mac,
+ .del_mac_addr_msg = ice_vc_repr_del_mac,
+ .cfg_qs_msg = ice_vc_cfg_qs_msg,
+ .ena_qs_msg = ice_vc_ena_qs_msg,
+ .dis_qs_msg = ice_vc_dis_qs_msg,
+ .request_qs_msg = ice_vc_request_qs_msg,
+ .cfg_irq_map_msg = ice_vc_cfg_irq_map_msg,
+ .config_rss_key = ice_vc_config_rss_key,
+ .config_rss_lut = ice_vc_config_rss_lut,
+ .get_stats_msg = ice_vc_get_stats_msg,
+ .cfg_promiscuous_mode_msg = ice_vc_repr_cfg_promiscuous_mode,
+ .add_vlan_msg = ice_vc_add_vlan_msg,
+ .remove_vlan_msg = ice_vc_remove_vlan_msg,
+ .ena_vlan_stripping = ice_vc_ena_vlan_stripping,
+ .dis_vlan_stripping = ice_vc_dis_vlan_stripping,
+ .handle_rss_cfg_msg = ice_vc_handle_rss_cfg,
+ .add_fdir_fltr_msg = ice_vc_add_fdir_fltr,
+ .del_fdir_fltr_msg = ice_vc_del_fdir_fltr,
+ .get_offload_vlan_v2_caps = ice_vc_get_offload_vlan_v2_caps,
+ .add_vlan_v2_msg = ice_vc_add_vlan_v2_msg,
+ .remove_vlan_v2_msg = ice_vc_remove_vlan_v2_msg,
+ .ena_vlan_stripping_v2_msg = ice_vc_ena_vlan_stripping_v2_msg,
+ .dis_vlan_stripping_v2_msg = ice_vc_dis_vlan_stripping_v2_msg,
+ .ena_vlan_insertion_v2_msg = ice_vc_ena_vlan_insertion_v2_msg,
+ .dis_vlan_insertion_v2_msg = ice_vc_dis_vlan_insertion_v2_msg,
+};
+
+/**
+ * ice_virtchnl_set_repr_ops - Switch to representor virtchnl ops
+ * @vf: the VF to switch ops
+ */
+void ice_virtchnl_set_repr_ops(struct ice_vf *vf)
+{
+ vf->virtchnl_ops = &ice_virtchnl_repr_ops;
+}
+
+/**
+ * ice_vc_process_vf_msg - Process request from VF
+ * @pf: pointer to the PF structure
+ * @event: pointer to the AQ event
+ *
+ * called from the common asq/arq handler to
+ * process request from VF
+ */
+void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
+{
+ u32 v_opcode = le32_to_cpu(event->desc.cookie_high);
+ s16 vf_id = le16_to_cpu(event->desc.retval);
+ const struct ice_virtchnl_ops *ops;
+ u16 msglen = event->msg_len;
+ u8 *msg = event->msg_buf;
+ struct ice_vf *vf = NULL;
+ struct device *dev;
+ int err = 0;
+
+ dev = ice_pf_to_dev(pf);
+
+ vf = ice_get_vf_by_id(pf, vf_id);
+ if (!vf) {
+ dev_err(dev, "Unable to locate VF for message from VF ID %d, opcode %d, len %d\n",
+ vf_id, v_opcode, msglen);
+ return;
+ }
+
+ mutex_lock(&vf->cfg_lock);
+
+ /* Check if VF is disabled. */
+ if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) {
+ err = -EPERM;
+ goto error_handler;
+ }
+
+ ops = vf->virtchnl_ops;
+
+ /* Perform basic checks on the msg */
+ err = virtchnl_vc_validate_vf_msg(&vf->vf_ver, v_opcode, msg, msglen);
+ if (err) {
+ if (err == VIRTCHNL_STATUS_ERR_PARAM)
+ err = -EPERM;
+ else
+ err = -EINVAL;
+ }
+
+error_handler:
+ if (err) {
+ ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_PARAM,
+ NULL, 0);
+ dev_err(dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n",
+ vf_id, v_opcode, msglen, err);
+ goto finish;
+ }
+
+ if (!ice_vc_is_opcode_allowed(vf, v_opcode)) {
+ ice_vc_send_msg_to_vf(vf, v_opcode,
+ VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL,
+ 0);
+ goto finish;
+ }
+
+ switch (v_opcode) {
+ case VIRTCHNL_OP_VERSION:
+ err = ops->get_ver_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_GET_VF_RESOURCES:
+ err = ops->get_vf_res_msg(vf, msg);
+ if (ice_vf_init_vlan_stripping(vf))
+ dev_dbg(dev, "Failed to initialize VLAN stripping for VF %d\n",
+ vf->vf_id);
+ ice_vc_notify_vf_link_state(vf);
+ break;
+ case VIRTCHNL_OP_RESET_VF:
+ ops->reset_vf(vf);
+ break;
+ case VIRTCHNL_OP_ADD_ETH_ADDR:
+ err = ops->add_mac_addr_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_DEL_ETH_ADDR:
+ err = ops->del_mac_addr_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_CONFIG_VSI_QUEUES:
+ err = ops->cfg_qs_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_ENABLE_QUEUES:
+ err = ops->ena_qs_msg(vf, msg);
+ ice_vc_notify_vf_link_state(vf);
+ break;
+ case VIRTCHNL_OP_DISABLE_QUEUES:
+ err = ops->dis_qs_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_REQUEST_QUEUES:
+ err = ops->request_qs_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_CONFIG_IRQ_MAP:
+ err = ops->cfg_irq_map_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_CONFIG_RSS_KEY:
+ err = ops->config_rss_key(vf, msg);
+ break;
+ case VIRTCHNL_OP_CONFIG_RSS_LUT:
+ err = ops->config_rss_lut(vf, msg);
+ break;
+ case VIRTCHNL_OP_GET_STATS:
+ err = ops->get_stats_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
+ err = ops->cfg_promiscuous_mode_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_ADD_VLAN:
+ err = ops->add_vlan_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_DEL_VLAN:
+ err = ops->remove_vlan_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
+ err = ops->ena_vlan_stripping(vf);
+ break;
+ case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
+ err = ops->dis_vlan_stripping(vf);
+ break;
+ case VIRTCHNL_OP_ADD_FDIR_FILTER:
+ err = ops->add_fdir_fltr_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_DEL_FDIR_FILTER:
+ err = ops->del_fdir_fltr_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_ADD_RSS_CFG:
+ err = ops->handle_rss_cfg_msg(vf, msg, true);
+ break;
+ case VIRTCHNL_OP_DEL_RSS_CFG:
+ err = ops->handle_rss_cfg_msg(vf, msg, false);
+ break;
+ case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS:
+ err = ops->get_offload_vlan_v2_caps(vf);
+ break;
+ case VIRTCHNL_OP_ADD_VLAN_V2:
+ err = ops->add_vlan_v2_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_DEL_VLAN_V2:
+ err = ops->remove_vlan_v2_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2:
+ err = ops->ena_vlan_stripping_v2_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2:
+ err = ops->dis_vlan_stripping_v2_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2:
+ err = ops->ena_vlan_insertion_v2_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2:
+ err = ops->dis_vlan_insertion_v2_msg(vf, msg);
+ break;
+ case VIRTCHNL_OP_UNKNOWN:
+ default:
+ dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode,
+ vf_id);
+ err = ice_vc_send_msg_to_vf(vf, v_opcode,
+ VIRTCHNL_STATUS_ERR_NOT_SUPPORTED,
+ NULL, 0);
+ break;
+ }
+ if (err) {
+ /* Helper function cares less about error return values here
+ * as it is busy with pending work.
+ */
+ dev_info(dev, "PF failed to honor VF %d, opcode %d, error %d\n",
+ vf_id, v_opcode, err);
+ }
+
+finish:
+ mutex_unlock(&vf->cfg_lock);
+ ice_put_vf(vf);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/ice_virtchnl.h
new file mode 100644
index 000000000..b5a3fd8ad
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2022, Intel Corporation. */
+
+#ifndef _ICE_VIRTCHNL_H_
+#define _ICE_VIRTCHNL_H_
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/if_ether.h>
+#include <linux/avf/virtchnl.h>
+#include "ice_vf_lib.h"
+
+/* Restrict number of MAC Addr and VLAN that non-trusted VF can programmed */
+#define ICE_MAX_VLAN_PER_VF 8
+
+/* MAC filters: 1 is reserved for the VF's default/perm_addr/LAA MAC, 1 for
+ * broadcast, and 16 for additional unicast/multicast filters
+ */
+#define ICE_MAX_MACADDR_PER_VF 18
+
+struct ice_virtchnl_ops {
+ int (*get_ver_msg)(struct ice_vf *vf, u8 *msg);
+ int (*get_vf_res_msg)(struct ice_vf *vf, u8 *msg);
+ void (*reset_vf)(struct ice_vf *vf);
+ int (*add_mac_addr_msg)(struct ice_vf *vf, u8 *msg);
+ int (*del_mac_addr_msg)(struct ice_vf *vf, u8 *msg);
+ int (*cfg_qs_msg)(struct ice_vf *vf, u8 *msg);
+ int (*ena_qs_msg)(struct ice_vf *vf, u8 *msg);
+ int (*dis_qs_msg)(struct ice_vf *vf, u8 *msg);
+ int (*request_qs_msg)(struct ice_vf *vf, u8 *msg);
+ int (*cfg_irq_map_msg)(struct ice_vf *vf, u8 *msg);
+ int (*config_rss_key)(struct ice_vf *vf, u8 *msg);
+ int (*config_rss_lut)(struct ice_vf *vf, u8 *msg);
+ int (*get_stats_msg)(struct ice_vf *vf, u8 *msg);
+ int (*cfg_promiscuous_mode_msg)(struct ice_vf *vf, u8 *msg);
+ int (*add_vlan_msg)(struct ice_vf *vf, u8 *msg);
+ int (*remove_vlan_msg)(struct ice_vf *vf, u8 *msg);
+ int (*ena_vlan_stripping)(struct ice_vf *vf);
+ int (*dis_vlan_stripping)(struct ice_vf *vf);
+ int (*handle_rss_cfg_msg)(struct ice_vf *vf, u8 *msg, bool add);
+ int (*add_fdir_fltr_msg)(struct ice_vf *vf, u8 *msg);
+ int (*del_fdir_fltr_msg)(struct ice_vf *vf, u8 *msg);
+ int (*get_offload_vlan_v2_caps)(struct ice_vf *vf);
+ int (*add_vlan_v2_msg)(struct ice_vf *vf, u8 *msg);
+ int (*remove_vlan_v2_msg)(struct ice_vf *vf, u8 *msg);
+ int (*ena_vlan_stripping_v2_msg)(struct ice_vf *vf, u8 *msg);
+ int (*dis_vlan_stripping_v2_msg)(struct ice_vf *vf, u8 *msg);
+ int (*ena_vlan_insertion_v2_msg)(struct ice_vf *vf, u8 *msg);
+ int (*dis_vlan_insertion_v2_msg)(struct ice_vf *vf, u8 *msg);
+};
+
+#ifdef CONFIG_PCI_IOV
+void ice_virtchnl_set_dflt_ops(struct ice_vf *vf);
+void ice_virtchnl_set_repr_ops(struct ice_vf *vf);
+void ice_vc_notify_vf_link_state(struct ice_vf *vf);
+void ice_vc_notify_link_state(struct ice_pf *pf);
+void ice_vc_notify_reset(struct ice_pf *pf);
+int
+ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
+ enum virtchnl_status_code v_retval, u8 *msg, u16 msglen);
+bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id);
+#else /* CONFIG_PCI_IOV */
+static inline void ice_virtchnl_set_dflt_ops(struct ice_vf *vf) { }
+static inline void ice_virtchnl_set_repr_ops(struct ice_vf *vf) { }
+static inline void ice_vc_notify_vf_link_state(struct ice_vf *vf) { }
+static inline void ice_vc_notify_link_state(struct ice_pf *pf) { }
+static inline void ice_vc_notify_reset(struct ice_pf *pf) { }
+
+static inline int
+ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
+ enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id)
+{
+ return false;
+}
+#endif /* !CONFIG_PCI_IOV */
+
+#endif /* _ICE_VIRTCHNL_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
new file mode 100644
index 000000000..5a82216e7
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021, Intel Corporation. */
+
+#include "ice_virtchnl_allowlist.h"
+
+/* Purpose of this file is to share functionality to allowlist or denylist
+ * opcodes used in PF <-> VF communication. Group of opcodes:
+ * - default -> should be always allowed after creating VF,
+ * default_allowlist_opcodes
+ * - opcodes needed by VF to work correctly, but not associated with caps ->
+ * should be allowed after successful VF resources allocation,
+ * working_allowlist_opcodes
+ * - opcodes needed by VF when caps are activated
+ *
+ * Caps that don't use new opcodes (no opcodes should be allowed):
+ * - VIRTCHNL_VF_OFFLOAD_RSS_AQ
+ * - VIRTCHNL_VF_OFFLOAD_RSS_REG
+ * - VIRTCHNL_VF_OFFLOAD_WB_ON_ITR
+ * - VIRTCHNL_VF_OFFLOAD_CRC
+ * - VIRTCHNL_VF_OFFLOAD_RX_POLLING
+ * - VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2
+ * - VIRTCHNL_VF_OFFLOAD_ENCAP
+ * - VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM
+ * - VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM
+ * - VIRTCHNL_VF_OFFLOAD_USO
+ */
+
+/* default opcodes to communicate with VF */
+static const u32 default_allowlist_opcodes[] = {
+ VIRTCHNL_OP_GET_VF_RESOURCES, VIRTCHNL_OP_VERSION, VIRTCHNL_OP_RESET_VF,
+};
+
+/* opcodes supported after successful VIRTCHNL_OP_GET_VF_RESOURCES */
+static const u32 working_allowlist_opcodes[] = {
+ VIRTCHNL_OP_CONFIG_TX_QUEUE, VIRTCHNL_OP_CONFIG_RX_QUEUE,
+ VIRTCHNL_OP_CONFIG_VSI_QUEUES, VIRTCHNL_OP_CONFIG_IRQ_MAP,
+ VIRTCHNL_OP_ENABLE_QUEUES, VIRTCHNL_OP_DISABLE_QUEUES,
+ VIRTCHNL_OP_GET_STATS, VIRTCHNL_OP_EVENT,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_L2 */
+static const u32 l2_allowlist_opcodes[] = {
+ VIRTCHNL_OP_ADD_ETH_ADDR, VIRTCHNL_OP_DEL_ETH_ADDR,
+ VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_REQ_QUEUES */
+static const u32 req_queues_allowlist_opcodes[] = {
+ VIRTCHNL_OP_REQUEST_QUEUES,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_VLAN */
+static const u32 vlan_allowlist_opcodes[] = {
+ VIRTCHNL_OP_ADD_VLAN, VIRTCHNL_OP_DEL_VLAN,
+ VIRTCHNL_OP_ENABLE_VLAN_STRIPPING, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_VLAN_V2 */
+static const u32 vlan_v2_allowlist_opcodes[] = {
+ VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS, VIRTCHNL_OP_ADD_VLAN_V2,
+ VIRTCHNL_OP_DEL_VLAN_V2, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2,
+ VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2,
+ VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2,
+ VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_RSS_PF */
+static const u32 rss_pf_allowlist_opcodes[] = {
+ VIRTCHNL_OP_CONFIG_RSS_KEY, VIRTCHNL_OP_CONFIG_RSS_LUT,
+ VIRTCHNL_OP_GET_RSS_HENA_CAPS, VIRTCHNL_OP_SET_RSS_HENA,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF */
+static const u32 adv_rss_pf_allowlist_opcodes[] = {
+ VIRTCHNL_OP_ADD_RSS_CFG, VIRTCHNL_OP_DEL_RSS_CFG,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_FDIR_PF */
+static const u32 fdir_pf_allowlist_opcodes[] = {
+ VIRTCHNL_OP_ADD_FDIR_FILTER, VIRTCHNL_OP_DEL_FDIR_FILTER,
+};
+
+struct allowlist_opcode_info {
+ const u32 *opcodes;
+ size_t size;
+};
+
+#define BIT_INDEX(caps) (HWEIGHT((caps) - 1))
+#define ALLOW_ITEM(caps, list) \
+ [BIT_INDEX(caps)] = { \
+ .opcodes = list, \
+ .size = ARRAY_SIZE(list) \
+ }
+static const struct allowlist_opcode_info allowlist_opcodes[] = {
+ ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_L2, l2_allowlist_opcodes),
+ ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_REQ_QUEUES, req_queues_allowlist_opcodes),
+ ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_VLAN, vlan_allowlist_opcodes),
+ ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_RSS_PF, rss_pf_allowlist_opcodes),
+ ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF, adv_rss_pf_allowlist_opcodes),
+ ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_FDIR_PF, fdir_pf_allowlist_opcodes),
+ ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_VLAN_V2, vlan_v2_allowlist_opcodes),
+};
+
+/**
+ * ice_vc_is_opcode_allowed - check if this opcode is allowed on this VF
+ * @vf: pointer to VF structure
+ * @opcode: virtchnl opcode
+ *
+ * Return true if message is allowed on this VF
+ */
+bool ice_vc_is_opcode_allowed(struct ice_vf *vf, u32 opcode)
+{
+ if (opcode >= VIRTCHNL_OP_MAX)
+ return false;
+
+ return test_bit(opcode, vf->opcodes_allowlist);
+}
+
+/**
+ * ice_vc_allowlist_opcodes - allowlist selected opcodes
+ * @vf: pointer to VF structure
+ * @opcodes: array of opocodes to allowlist
+ * @size: size of opcodes array
+ *
+ * Function should be called to allowlist opcodes on VF.
+ */
+static void
+ice_vc_allowlist_opcodes(struct ice_vf *vf, const u32 *opcodes, size_t size)
+{
+ unsigned int i;
+
+ for (i = 0; i < size; i++)
+ set_bit(opcodes[i], vf->opcodes_allowlist);
+}
+
+/**
+ * ice_vc_clear_allowlist - clear all allowlist opcodes
+ * @vf: pointer to VF structure
+ */
+static void ice_vc_clear_allowlist(struct ice_vf *vf)
+{
+ bitmap_zero(vf->opcodes_allowlist, VIRTCHNL_OP_MAX);
+}
+
+/**
+ * ice_vc_set_default_allowlist - allowlist default opcodes for VF
+ * @vf: pointer to VF structure
+ */
+void ice_vc_set_default_allowlist(struct ice_vf *vf)
+{
+ ice_vc_clear_allowlist(vf);
+ ice_vc_allowlist_opcodes(vf, default_allowlist_opcodes,
+ ARRAY_SIZE(default_allowlist_opcodes));
+}
+
+/**
+ * ice_vc_set_working_allowlist - allowlist opcodes needed to by VF to work
+ * @vf: pointer to VF structure
+ *
+ * allowlist opcodes that aren't associated with specific caps, but
+ * are needed by VF to work.
+ */
+void ice_vc_set_working_allowlist(struct ice_vf *vf)
+{
+ ice_vc_allowlist_opcodes(vf, working_allowlist_opcodes,
+ ARRAY_SIZE(working_allowlist_opcodes));
+}
+
+/**
+ * ice_vc_set_caps_allowlist - allowlist VF opcodes according caps
+ * @vf: pointer to VF structure
+ */
+void ice_vc_set_caps_allowlist(struct ice_vf *vf)
+{
+ unsigned long caps = vf->driver_caps;
+ unsigned int i;
+
+ for_each_set_bit(i, &caps, ARRAY_SIZE(allowlist_opcodes))
+ ice_vc_allowlist_opcodes(vf, allowlist_opcodes[i].opcodes,
+ allowlist_opcodes[i].size);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h
new file mode 100644
index 000000000..d3ae86ded
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _ICE_VIRTCHNL_ALLOWLIST_H_
+#define _ICE_VIRTCHNL_ALLOWLIST_H_
+#include "ice.h"
+
+bool ice_vc_is_opcode_allowed(struct ice_vf *vf, u32 opcode);
+
+void ice_vc_set_default_allowlist(struct ice_vf *vf);
+void ice_vc_set_working_allowlist(struct ice_vf *vf);
+void ice_vc_set_caps_allowlist(struct ice_vf *vf);
+#endif /* _ICE_VIRTCHNL_ALLOWLIST_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
new file mode 100644
index 000000000..7f7260407
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
@@ -0,0 +1,2029 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_lib.h"
+#include "ice_flow.h"
+#include "ice_vf_lib_private.h"
+
+#define to_fltr_conf_from_desc(p) \
+ container_of(p, struct virtchnl_fdir_fltr_conf, input)
+
+#define ICE_FLOW_PROF_TYPE_S 0
+#define ICE_FLOW_PROF_TYPE_M (0xFFFFFFFFULL << ICE_FLOW_PROF_TYPE_S)
+#define ICE_FLOW_PROF_VSI_S 32
+#define ICE_FLOW_PROF_VSI_M (0xFFFFFFFFULL << ICE_FLOW_PROF_VSI_S)
+
+/* Flow profile ID format:
+ * [0:31] - flow type, flow + tun_offs
+ * [32:63] - VSI index
+ */
+#define ICE_FLOW_PROF_FD(vsi, flow, tun_offs) \
+ ((u64)(((((flow) + (tun_offs)) & ICE_FLOW_PROF_TYPE_M)) | \
+ (((u64)(vsi) << ICE_FLOW_PROF_VSI_S) & ICE_FLOW_PROF_VSI_M)))
+
+#define GTPU_TEID_OFFSET 4
+#define GTPU_EH_QFI_OFFSET 1
+#define GTPU_EH_QFI_MASK 0x3F
+#define PFCP_S_OFFSET 0
+#define PFCP_S_MASK 0x1
+#define PFCP_PORT_NR 8805
+
+#define FDIR_INSET_FLAG_ESP_S 0
+#define FDIR_INSET_FLAG_ESP_M BIT_ULL(FDIR_INSET_FLAG_ESP_S)
+#define FDIR_INSET_FLAG_ESP_UDP BIT_ULL(FDIR_INSET_FLAG_ESP_S)
+#define FDIR_INSET_FLAG_ESP_IPSEC (0ULL << FDIR_INSET_FLAG_ESP_S)
+
+enum ice_fdir_tunnel_type {
+ ICE_FDIR_TUNNEL_TYPE_NONE = 0,
+ ICE_FDIR_TUNNEL_TYPE_GTPU,
+ ICE_FDIR_TUNNEL_TYPE_GTPU_EH,
+};
+
+struct virtchnl_fdir_fltr_conf {
+ struct ice_fdir_fltr input;
+ enum ice_fdir_tunnel_type ttype;
+ u64 inset_flag;
+ u32 flow_id;
+};
+
+struct virtchnl_fdir_inset_map {
+ enum virtchnl_proto_hdr_field field;
+ enum ice_flow_field fld;
+ u64 flag;
+ u64 mask;
+};
+
+static const struct virtchnl_fdir_inset_map fdir_inset_map[] = {
+ {VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE, ICE_FLOW_FIELD_IDX_ETH_TYPE, 0, 0},
+ {VIRTCHNL_PROTO_HDR_IPV4_SRC, ICE_FLOW_FIELD_IDX_IPV4_SA, 0, 0},
+ {VIRTCHNL_PROTO_HDR_IPV4_DST, ICE_FLOW_FIELD_IDX_IPV4_DA, 0, 0},
+ {VIRTCHNL_PROTO_HDR_IPV4_DSCP, ICE_FLOW_FIELD_IDX_IPV4_DSCP, 0, 0},
+ {VIRTCHNL_PROTO_HDR_IPV4_TTL, ICE_FLOW_FIELD_IDX_IPV4_TTL, 0, 0},
+ {VIRTCHNL_PROTO_HDR_IPV4_PROT, ICE_FLOW_FIELD_IDX_IPV4_PROT, 0, 0},
+ {VIRTCHNL_PROTO_HDR_IPV6_SRC, ICE_FLOW_FIELD_IDX_IPV6_SA, 0, 0},
+ {VIRTCHNL_PROTO_HDR_IPV6_DST, ICE_FLOW_FIELD_IDX_IPV6_DA, 0, 0},
+ {VIRTCHNL_PROTO_HDR_IPV6_TC, ICE_FLOW_FIELD_IDX_IPV6_DSCP, 0, 0},
+ {VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT, ICE_FLOW_FIELD_IDX_IPV6_TTL, 0, 0},
+ {VIRTCHNL_PROTO_HDR_IPV6_PROT, ICE_FLOW_FIELD_IDX_IPV6_PROT, 0, 0},
+ {VIRTCHNL_PROTO_HDR_UDP_SRC_PORT, ICE_FLOW_FIELD_IDX_UDP_SRC_PORT, 0, 0},
+ {VIRTCHNL_PROTO_HDR_UDP_DST_PORT, ICE_FLOW_FIELD_IDX_UDP_DST_PORT, 0, 0},
+ {VIRTCHNL_PROTO_HDR_TCP_SRC_PORT, ICE_FLOW_FIELD_IDX_TCP_SRC_PORT, 0, 0},
+ {VIRTCHNL_PROTO_HDR_TCP_DST_PORT, ICE_FLOW_FIELD_IDX_TCP_DST_PORT, 0, 0},
+ {VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT, ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT, 0, 0},
+ {VIRTCHNL_PROTO_HDR_SCTP_DST_PORT, ICE_FLOW_FIELD_IDX_SCTP_DST_PORT, 0, 0},
+ {VIRTCHNL_PROTO_HDR_GTPU_IP_TEID, ICE_FLOW_FIELD_IDX_GTPU_IP_TEID, 0, 0},
+ {VIRTCHNL_PROTO_HDR_GTPU_EH_QFI, ICE_FLOW_FIELD_IDX_GTPU_EH_QFI, 0, 0},
+ {VIRTCHNL_PROTO_HDR_ESP_SPI, ICE_FLOW_FIELD_IDX_ESP_SPI,
+ FDIR_INSET_FLAG_ESP_IPSEC, FDIR_INSET_FLAG_ESP_M},
+ {VIRTCHNL_PROTO_HDR_ESP_SPI, ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI,
+ FDIR_INSET_FLAG_ESP_UDP, FDIR_INSET_FLAG_ESP_M},
+ {VIRTCHNL_PROTO_HDR_AH_SPI, ICE_FLOW_FIELD_IDX_AH_SPI, 0, 0},
+ {VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID, ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID, 0, 0},
+ {VIRTCHNL_PROTO_HDR_PFCP_S_FIELD, ICE_FLOW_FIELD_IDX_UDP_DST_PORT, 0, 0},
+};
+
+/**
+ * ice_vc_fdir_param_check
+ * @vf: pointer to the VF structure
+ * @vsi_id: VF relative VSI ID
+ *
+ * Check for the valid VSI ID, PF's state and VF's state
+ *
+ * Return: 0 on success, and -EINVAL on error.
+ */
+static int
+ice_vc_fdir_param_check(struct ice_vf *vf, u16 vsi_id)
+{
+ struct ice_pf *pf = vf->pf;
+
+ if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
+ return -EINVAL;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+ return -EINVAL;
+
+ if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF))
+ return -EINVAL;
+
+ if (vsi_id != vf->lan_vsi_num)
+ return -EINVAL;
+
+ if (!ice_vc_isvalid_vsi_id(vf, vsi_id))
+ return -EINVAL;
+
+ if (!pf->vsi[vf->lan_vsi_idx])
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * ice_vf_start_ctrl_vsi
+ * @vf: pointer to the VF structure
+ *
+ * Allocate ctrl_vsi for the first time and open the ctrl_vsi port for VF
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int ice_vf_start_ctrl_vsi(struct ice_vf *vf)
+{
+ struct ice_pf *pf = vf->pf;
+ struct ice_vsi *ctrl_vsi;
+ struct device *dev;
+ int err;
+
+ dev = ice_pf_to_dev(pf);
+ if (vf->ctrl_vsi_idx != ICE_NO_VSI)
+ return -EEXIST;
+
+ ctrl_vsi = ice_vf_ctrl_vsi_setup(vf);
+ if (!ctrl_vsi) {
+ dev_dbg(dev, "Could not setup control VSI for VF %d\n",
+ vf->vf_id);
+ return -ENOMEM;
+ }
+
+ err = ice_vsi_open_ctrl(ctrl_vsi);
+ if (err) {
+ dev_dbg(dev, "Could not open control VSI for VF %d\n",
+ vf->vf_id);
+ goto err_vsi_open;
+ }
+
+ return 0;
+
+err_vsi_open:
+ ice_vsi_release(ctrl_vsi);
+ if (vf->ctrl_vsi_idx != ICE_NO_VSI) {
+ pf->vsi[vf->ctrl_vsi_idx] = NULL;
+ vf->ctrl_vsi_idx = ICE_NO_VSI;
+ }
+ return err;
+}
+
+/**
+ * ice_vc_fdir_alloc_prof - allocate profile for this filter flow type
+ * @vf: pointer to the VF structure
+ * @flow: filter flow type
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_alloc_prof(struct ice_vf *vf, enum ice_fltr_ptype flow)
+{
+ struct ice_vf_fdir *fdir = &vf->fdir;
+
+ if (!fdir->fdir_prof) {
+ fdir->fdir_prof = devm_kcalloc(ice_pf_to_dev(vf->pf),
+ ICE_FLTR_PTYPE_MAX,
+ sizeof(*fdir->fdir_prof),
+ GFP_KERNEL);
+ if (!fdir->fdir_prof)
+ return -ENOMEM;
+ }
+
+ if (!fdir->fdir_prof[flow]) {
+ fdir->fdir_prof[flow] = devm_kzalloc(ice_pf_to_dev(vf->pf),
+ sizeof(**fdir->fdir_prof),
+ GFP_KERNEL);
+ if (!fdir->fdir_prof[flow])
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vc_fdir_free_prof - free profile for this filter flow type
+ * @vf: pointer to the VF structure
+ * @flow: filter flow type
+ */
+static void
+ice_vc_fdir_free_prof(struct ice_vf *vf, enum ice_fltr_ptype flow)
+{
+ struct ice_vf_fdir *fdir = &vf->fdir;
+
+ if (!fdir->fdir_prof)
+ return;
+
+ if (!fdir->fdir_prof[flow])
+ return;
+
+ devm_kfree(ice_pf_to_dev(vf->pf), fdir->fdir_prof[flow]);
+ fdir->fdir_prof[flow] = NULL;
+}
+
+/**
+ * ice_vc_fdir_free_prof_all - free all the profile for this VF
+ * @vf: pointer to the VF structure
+ */
+static void ice_vc_fdir_free_prof_all(struct ice_vf *vf)
+{
+ struct ice_vf_fdir *fdir = &vf->fdir;
+ enum ice_fltr_ptype flow;
+
+ if (!fdir->fdir_prof)
+ return;
+
+ for (flow = ICE_FLTR_PTYPE_NONF_NONE; flow < ICE_FLTR_PTYPE_MAX; flow++)
+ ice_vc_fdir_free_prof(vf, flow);
+
+ devm_kfree(ice_pf_to_dev(vf->pf), fdir->fdir_prof);
+ fdir->fdir_prof = NULL;
+}
+
+/**
+ * ice_vc_fdir_parse_flow_fld
+ * @proto_hdr: virtual channel protocol filter header
+ * @conf: FDIR configuration for each filter
+ * @fld: field type array
+ * @fld_cnt: field counter
+ *
+ * Parse the virtual channel filter header and store them into field type array
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_parse_flow_fld(struct virtchnl_proto_hdr *proto_hdr,
+ struct virtchnl_fdir_fltr_conf *conf,
+ enum ice_flow_field *fld, int *fld_cnt)
+{
+ struct virtchnl_proto_hdr hdr;
+ u32 i;
+
+ memcpy(&hdr, proto_hdr, sizeof(hdr));
+
+ for (i = 0; (i < ARRAY_SIZE(fdir_inset_map)) &&
+ VIRTCHNL_GET_PROTO_HDR_FIELD(&hdr); i++)
+ if (VIRTCHNL_TEST_PROTO_HDR(&hdr, fdir_inset_map[i].field)) {
+ if (fdir_inset_map[i].mask &&
+ ((fdir_inset_map[i].mask & conf->inset_flag) !=
+ fdir_inset_map[i].flag))
+ continue;
+
+ fld[*fld_cnt] = fdir_inset_map[i].fld;
+ *fld_cnt += 1;
+ if (*fld_cnt >= ICE_FLOW_FIELD_IDX_MAX)
+ return -EINVAL;
+ VIRTCHNL_DEL_PROTO_HDR_FIELD(&hdr,
+ fdir_inset_map[i].field);
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vc_fdir_set_flow_fld
+ * @vf: pointer to the VF structure
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ * @seg: array of one or more packet segments that describe the flow
+ *
+ * Parse the virtual channel add msg buffer's field vector and store them into
+ * flow's packet segment field
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_set_flow_fld(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+ struct virtchnl_fdir_fltr_conf *conf,
+ struct ice_flow_seg_info *seg)
+{
+ struct virtchnl_fdir_rule *rule = &fltr->rule_cfg;
+ enum ice_flow_field fld[ICE_FLOW_FIELD_IDX_MAX];
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ struct virtchnl_proto_hdrs *proto;
+ int fld_cnt = 0;
+ int i;
+
+ proto = &rule->proto_hdrs;
+ for (i = 0; i < proto->count; i++) {
+ struct virtchnl_proto_hdr *hdr = &proto->proto_hdr[i];
+ int ret;
+
+ ret = ice_vc_fdir_parse_flow_fld(hdr, conf, fld, &fld_cnt);
+ if (ret)
+ return ret;
+ }
+
+ if (fld_cnt == 0) {
+ dev_dbg(dev, "Empty input set for VF %d\n", vf->vf_id);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < fld_cnt; i++)
+ ice_flow_set_fld(seg, fld[i],
+ ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL,
+ ICE_FLOW_FLD_OFF_INVAL, false);
+
+ return 0;
+}
+
+/**
+ * ice_vc_fdir_set_flow_hdr - config the flow's packet segment header
+ * @vf: pointer to the VF structure
+ * @conf: FDIR configuration for each filter
+ * @seg: array of one or more packet segments that describe the flow
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_set_flow_hdr(struct ice_vf *vf,
+ struct virtchnl_fdir_fltr_conf *conf,
+ struct ice_flow_seg_info *seg)
+{
+ enum ice_fltr_ptype flow = conf->input.flow_type;
+ enum ice_fdir_tunnel_type ttype = conf->ttype;
+ struct device *dev = ice_pf_to_dev(vf->pf);
+
+ switch (flow) {
+ case ICE_FLTR_PTYPE_NON_IP_L2:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_ETH_NON_IP);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_L2TPV3 |
+ ICE_FLOW_SEG_HDR_IPV4 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_ESP:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_ESP |
+ ICE_FLOW_SEG_HDR_IPV4 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_AH:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_AH |
+ ICE_FLOW_SEG_HDR_IPV4 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_NAT_T_ESP |
+ ICE_FLOW_SEG_HDR_IPV4 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_NODE |
+ ICE_FLOW_SEG_HDR_IPV4 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_SESSION |
+ ICE_FLOW_SEG_HDR_IPV4 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV4 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_TCP |
+ ICE_FLOW_SEG_HDR_IPV4 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_UDP |
+ ICE_FLOW_SEG_HDR_IPV4 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP:
+ case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP:
+ case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP:
+ case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER:
+ if (ttype == ICE_FDIR_TUNNEL_TYPE_GTPU) {
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_GTPU_IP |
+ ICE_FLOW_SEG_HDR_IPV4 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ } else if (ttype == ICE_FDIR_TUNNEL_TYPE_GTPU_EH) {
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_GTPU_EH |
+ ICE_FLOW_SEG_HDR_GTPU_IP |
+ ICE_FLOW_SEG_HDR_IPV4 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ } else {
+ dev_dbg(dev, "Invalid tunnel type 0x%x for VF %d\n",
+ flow, vf->vf_id);
+ return -EINVAL;
+ }
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_SCTP |
+ ICE_FLOW_SEG_HDR_IPV4 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_L2TPV3 |
+ ICE_FLOW_SEG_HDR_IPV6 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_ESP:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_ESP |
+ ICE_FLOW_SEG_HDR_IPV6 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_AH:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_AH |
+ ICE_FLOW_SEG_HDR_IPV6 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_NAT_T_ESP |
+ ICE_FLOW_SEG_HDR_IPV6 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_NODE |
+ ICE_FLOW_SEG_HDR_IPV6 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_SESSION |
+ ICE_FLOW_SEG_HDR_IPV6 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV6 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_TCP |
+ ICE_FLOW_SEG_HDR_IPV6 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_UDP |
+ ICE_FLOW_SEG_HDR_IPV6 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
+ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_SCTP |
+ ICE_FLOW_SEG_HDR_IPV6 |
+ ICE_FLOW_SEG_HDR_IPV_OTHER);
+ break;
+ default:
+ dev_dbg(dev, "Invalid flow type 0x%x for VF %d failed\n",
+ flow, vf->vf_id);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vc_fdir_rem_prof - remove profile for this filter flow type
+ * @vf: pointer to the VF structure
+ * @flow: filter flow type
+ * @tun: 0 implies non-tunnel type filter, 1 implies tunnel type filter
+ */
+static void
+ice_vc_fdir_rem_prof(struct ice_vf *vf, enum ice_fltr_ptype flow, int tun)
+{
+ struct ice_vf_fdir *fdir = &vf->fdir;
+ struct ice_fd_hw_prof *vf_prof;
+ struct ice_pf *pf = vf->pf;
+ struct ice_vsi *vf_vsi;
+ struct device *dev;
+ struct ice_hw *hw;
+ u64 prof_id;
+ int i;
+
+ dev = ice_pf_to_dev(pf);
+ hw = &pf->hw;
+ if (!fdir->fdir_prof || !fdir->fdir_prof[flow])
+ return;
+
+ vf_prof = fdir->fdir_prof[flow];
+
+ vf_vsi = pf->vsi[vf->lan_vsi_idx];
+ if (!vf_vsi) {
+ dev_dbg(dev, "NULL vf %d vsi pointer\n", vf->vf_id);
+ return;
+ }
+
+ if (!fdir->prof_entry_cnt[flow][tun])
+ return;
+
+ prof_id = ICE_FLOW_PROF_FD(vf_vsi->vsi_num,
+ flow, tun ? ICE_FLTR_PTYPE_MAX : 0);
+
+ for (i = 0; i < fdir->prof_entry_cnt[flow][tun]; i++)
+ if (vf_prof->entry_h[i][tun]) {
+ u16 vsi_num = ice_get_hw_vsi_num(hw, vf_prof->vsi_h[i]);
+
+ ice_rem_prof_id_flow(hw, ICE_BLK_FD, vsi_num, prof_id);
+ ice_flow_rem_entry(hw, ICE_BLK_FD,
+ vf_prof->entry_h[i][tun]);
+ vf_prof->entry_h[i][tun] = 0;
+ }
+
+ ice_flow_rem_prof(hw, ICE_BLK_FD, prof_id);
+ devm_kfree(dev, vf_prof->fdir_seg[tun]);
+ vf_prof->fdir_seg[tun] = NULL;
+
+ for (i = 0; i < vf_prof->cnt; i++)
+ vf_prof->vsi_h[i] = 0;
+
+ fdir->prof_entry_cnt[flow][tun] = 0;
+}
+
+/**
+ * ice_vc_fdir_rem_prof_all - remove profile for this VF
+ * @vf: pointer to the VF structure
+ */
+static void ice_vc_fdir_rem_prof_all(struct ice_vf *vf)
+{
+ enum ice_fltr_ptype flow;
+
+ for (flow = ICE_FLTR_PTYPE_NONF_NONE;
+ flow < ICE_FLTR_PTYPE_MAX; flow++) {
+ ice_vc_fdir_rem_prof(vf, flow, 0);
+ ice_vc_fdir_rem_prof(vf, flow, 1);
+ }
+}
+
+/**
+ * ice_vc_fdir_reset_cnt_all - reset all FDIR counters for this VF FDIR
+ * @fdir: pointer to the VF FDIR structure
+ */
+static void ice_vc_fdir_reset_cnt_all(struct ice_vf_fdir *fdir)
+{
+ enum ice_fltr_ptype flow;
+
+ for (flow = ICE_FLTR_PTYPE_NONF_NONE;
+ flow < ICE_FLTR_PTYPE_MAX; flow++) {
+ fdir->fdir_fltr_cnt[flow][0] = 0;
+ fdir->fdir_fltr_cnt[flow][1] = 0;
+ }
+}
+
+/**
+ * ice_vc_fdir_has_prof_conflict
+ * @vf: pointer to the VF structure
+ * @conf: FDIR configuration for each filter
+ *
+ * Check if @conf has conflicting profile with existing profiles
+ *
+ * Return: true on success, and false on error.
+ */
+static bool
+ice_vc_fdir_has_prof_conflict(struct ice_vf *vf,
+ struct virtchnl_fdir_fltr_conf *conf)
+{
+ struct ice_fdir_fltr *desc;
+
+ list_for_each_entry(desc, &vf->fdir.fdir_rule_list, fltr_node) {
+ struct virtchnl_fdir_fltr_conf *existing_conf;
+ enum ice_fltr_ptype flow_type_a, flow_type_b;
+ struct ice_fdir_fltr *a, *b;
+
+ existing_conf = to_fltr_conf_from_desc(desc);
+ a = &existing_conf->input;
+ b = &conf->input;
+ flow_type_a = a->flow_type;
+ flow_type_b = b->flow_type;
+
+ /* No need to compare two rules with different tunnel types or
+ * with the same protocol type.
+ */
+ if (existing_conf->ttype != conf->ttype ||
+ flow_type_a == flow_type_b)
+ continue;
+
+ switch (flow_type_a) {
+ case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+ case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+ case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
+ if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_OTHER)
+ return true;
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
+ if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+ flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
+ flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_SCTP)
+ return true;
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+ case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+ case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
+ if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_OTHER)
+ return true;
+ break;
+ case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
+ if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_UDP ||
+ flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_TCP ||
+ flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_SCTP)
+ return true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * ice_vc_fdir_write_flow_prof
+ * @vf: pointer to the VF structure
+ * @flow: filter flow type
+ * @seg: array of one or more packet segments that describe the flow
+ * @tun: 0 implies non-tunnel type filter, 1 implies tunnel type filter
+ *
+ * Write the flow's profile config and packet segment into the hardware
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_write_flow_prof(struct ice_vf *vf, enum ice_fltr_ptype flow,
+ struct ice_flow_seg_info *seg, int tun)
+{
+ struct ice_vf_fdir *fdir = &vf->fdir;
+ struct ice_vsi *vf_vsi, *ctrl_vsi;
+ struct ice_flow_seg_info *old_seg;
+ struct ice_flow_prof *prof = NULL;
+ struct ice_fd_hw_prof *vf_prof;
+ struct device *dev;
+ struct ice_pf *pf;
+ struct ice_hw *hw;
+ u64 entry1_h = 0;
+ u64 entry2_h = 0;
+ u64 prof_id;
+ int ret;
+
+ pf = vf->pf;
+ dev = ice_pf_to_dev(pf);
+ hw = &pf->hw;
+ vf_vsi = pf->vsi[vf->lan_vsi_idx];
+ if (!vf_vsi)
+ return -EINVAL;
+
+ ctrl_vsi = pf->vsi[vf->ctrl_vsi_idx];
+ if (!ctrl_vsi)
+ return -EINVAL;
+
+ vf_prof = fdir->fdir_prof[flow];
+ old_seg = vf_prof->fdir_seg[tun];
+ if (old_seg) {
+ if (!memcmp(old_seg, seg, sizeof(*seg))) {
+ dev_dbg(dev, "Duplicated profile for VF %d!\n",
+ vf->vf_id);
+ return -EEXIST;
+ }
+
+ if (fdir->fdir_fltr_cnt[flow][tun]) {
+ ret = -EINVAL;
+ dev_dbg(dev, "Input set conflicts for VF %d\n",
+ vf->vf_id);
+ goto err_exit;
+ }
+
+ /* remove previously allocated profile */
+ ice_vc_fdir_rem_prof(vf, flow, tun);
+ }
+
+ prof_id = ICE_FLOW_PROF_FD(vf_vsi->vsi_num, flow,
+ tun ? ICE_FLTR_PTYPE_MAX : 0);
+
+ ret = ice_flow_add_prof(hw, ICE_BLK_FD, ICE_FLOW_RX, prof_id, seg,
+ tun + 1, &prof);
+ if (ret) {
+ dev_dbg(dev, "Could not add VSI flow 0x%x for VF %d\n",
+ flow, vf->vf_id);
+ goto err_exit;
+ }
+
+ ret = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, vf_vsi->idx,
+ vf_vsi->idx, ICE_FLOW_PRIO_NORMAL,
+ seg, &entry1_h);
+ if (ret) {
+ dev_dbg(dev, "Could not add flow 0x%x VSI entry for VF %d\n",
+ flow, vf->vf_id);
+ goto err_prof;
+ }
+
+ ret = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, vf_vsi->idx,
+ ctrl_vsi->idx, ICE_FLOW_PRIO_NORMAL,
+ seg, &entry2_h);
+ if (ret) {
+ dev_dbg(dev,
+ "Could not add flow 0x%x Ctrl VSI entry for VF %d\n",
+ flow, vf->vf_id);
+ goto err_entry_1;
+ }
+
+ vf_prof->fdir_seg[tun] = seg;
+ vf_prof->cnt = 0;
+ fdir->prof_entry_cnt[flow][tun] = 0;
+
+ vf_prof->entry_h[vf_prof->cnt][tun] = entry1_h;
+ vf_prof->vsi_h[vf_prof->cnt] = vf_vsi->idx;
+ vf_prof->cnt++;
+ fdir->prof_entry_cnt[flow][tun]++;
+
+ vf_prof->entry_h[vf_prof->cnt][tun] = entry2_h;
+ vf_prof->vsi_h[vf_prof->cnt] = ctrl_vsi->idx;
+ vf_prof->cnt++;
+ fdir->prof_entry_cnt[flow][tun]++;
+
+ return 0;
+
+err_entry_1:
+ ice_rem_prof_id_flow(hw, ICE_BLK_FD,
+ ice_get_hw_vsi_num(hw, vf_vsi->idx), prof_id);
+ ice_flow_rem_entry(hw, ICE_BLK_FD, entry1_h);
+err_prof:
+ ice_flow_rem_prof(hw, ICE_BLK_FD, prof_id);
+err_exit:
+ return ret;
+}
+
+/**
+ * ice_vc_fdir_config_input_set
+ * @vf: pointer to the VF structure
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ * @tun: 0 implies non-tunnel type filter, 1 implies tunnel type filter
+ *
+ * Config the input set type and value for virtual channel add msg buffer
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_config_input_set(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+ struct virtchnl_fdir_fltr_conf *conf, int tun)
+{
+ struct ice_fdir_fltr *input = &conf->input;
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ struct ice_flow_seg_info *seg;
+ enum ice_fltr_ptype flow;
+ int ret;
+
+ ret = ice_vc_fdir_has_prof_conflict(vf, conf);
+ if (ret) {
+ dev_dbg(dev, "Found flow profile conflict for VF %d\n",
+ vf->vf_id);
+ return ret;
+ }
+
+ flow = input->flow_type;
+ ret = ice_vc_fdir_alloc_prof(vf, flow);
+ if (ret) {
+ dev_dbg(dev, "Alloc flow prof for VF %d failed\n", vf->vf_id);
+ return ret;
+ }
+
+ seg = devm_kzalloc(dev, sizeof(*seg), GFP_KERNEL);
+ if (!seg)
+ return -ENOMEM;
+
+ ret = ice_vc_fdir_set_flow_fld(vf, fltr, conf, seg);
+ if (ret) {
+ dev_dbg(dev, "Set flow field for VF %d failed\n", vf->vf_id);
+ goto err_exit;
+ }
+
+ ret = ice_vc_fdir_set_flow_hdr(vf, conf, seg);
+ if (ret) {
+ dev_dbg(dev, "Set flow hdr for VF %d failed\n", vf->vf_id);
+ goto err_exit;
+ }
+
+ ret = ice_vc_fdir_write_flow_prof(vf, flow, seg, tun);
+ if (ret == -EEXIST) {
+ devm_kfree(dev, seg);
+ } else if (ret) {
+ dev_dbg(dev, "Write flow profile for VF %d failed\n",
+ vf->vf_id);
+ goto err_exit;
+ }
+
+ return 0;
+
+err_exit:
+ devm_kfree(dev, seg);
+ return ret;
+}
+
+/**
+ * ice_vc_fdir_parse_pattern
+ * @vf: pointer to the VF info
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ *
+ * Parse the virtual channel filter's pattern and store them into conf
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_parse_pattern(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+ struct virtchnl_fdir_fltr_conf *conf)
+{
+ struct virtchnl_proto_hdrs *proto = &fltr->rule_cfg.proto_hdrs;
+ enum virtchnl_proto_hdr_type l3 = VIRTCHNL_PROTO_HDR_NONE;
+ enum virtchnl_proto_hdr_type l4 = VIRTCHNL_PROTO_HDR_NONE;
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ struct ice_fdir_fltr *input = &conf->input;
+ int i;
+
+ if (proto->count > VIRTCHNL_MAX_NUM_PROTO_HDRS) {
+ dev_dbg(dev, "Invalid protocol count:0x%x for VF %d\n",
+ proto->count, vf->vf_id);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < proto->count; i++) {
+ struct virtchnl_proto_hdr *hdr = &proto->proto_hdr[i];
+ struct ip_esp_hdr *esph;
+ struct ip_auth_hdr *ah;
+ struct sctphdr *sctph;
+ struct ipv6hdr *ip6h;
+ struct udphdr *udph;
+ struct tcphdr *tcph;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+ u8 s_field;
+ u8 *rawh;
+
+ switch (hdr->type) {
+ case VIRTCHNL_PROTO_HDR_ETH:
+ eth = (struct ethhdr *)hdr->buffer;
+ input->flow_type = ICE_FLTR_PTYPE_NON_IP_L2;
+
+ if (hdr->field_selector)
+ input->ext_data.ether_type = eth->h_proto;
+ break;
+ case VIRTCHNL_PROTO_HDR_IPV4:
+ iph = (struct iphdr *)hdr->buffer;
+ l3 = VIRTCHNL_PROTO_HDR_IPV4;
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_OTHER;
+
+ if (hdr->field_selector) {
+ input->ip.v4.src_ip = iph->saddr;
+ input->ip.v4.dst_ip = iph->daddr;
+ input->ip.v4.tos = iph->tos;
+ input->ip.v4.proto = iph->protocol;
+ }
+ break;
+ case VIRTCHNL_PROTO_HDR_IPV6:
+ ip6h = (struct ipv6hdr *)hdr->buffer;
+ l3 = VIRTCHNL_PROTO_HDR_IPV6;
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_OTHER;
+
+ if (hdr->field_selector) {
+ memcpy(input->ip.v6.src_ip,
+ ip6h->saddr.in6_u.u6_addr8,
+ sizeof(ip6h->saddr));
+ memcpy(input->ip.v6.dst_ip,
+ ip6h->daddr.in6_u.u6_addr8,
+ sizeof(ip6h->daddr));
+ input->ip.v6.tc = ((u8)(ip6h->priority) << 4) |
+ (ip6h->flow_lbl[0] >> 4);
+ input->ip.v6.proto = ip6h->nexthdr;
+ }
+ break;
+ case VIRTCHNL_PROTO_HDR_TCP:
+ tcph = (struct tcphdr *)hdr->buffer;
+ if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_TCP;
+ else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_TCP;
+
+ if (hdr->field_selector) {
+ if (l3 == VIRTCHNL_PROTO_HDR_IPV4) {
+ input->ip.v4.src_port = tcph->source;
+ input->ip.v4.dst_port = tcph->dest;
+ } else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) {
+ input->ip.v6.src_port = tcph->source;
+ input->ip.v6.dst_port = tcph->dest;
+ }
+ }
+ break;
+ case VIRTCHNL_PROTO_HDR_UDP:
+ udph = (struct udphdr *)hdr->buffer;
+ if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_UDP;
+ else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_UDP;
+
+ if (hdr->field_selector) {
+ if (l3 == VIRTCHNL_PROTO_HDR_IPV4) {
+ input->ip.v4.src_port = udph->source;
+ input->ip.v4.dst_port = udph->dest;
+ } else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) {
+ input->ip.v6.src_port = udph->source;
+ input->ip.v6.dst_port = udph->dest;
+ }
+ }
+ break;
+ case VIRTCHNL_PROTO_HDR_SCTP:
+ sctph = (struct sctphdr *)hdr->buffer;
+ if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+ input->flow_type =
+ ICE_FLTR_PTYPE_NONF_IPV4_SCTP;
+ else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+ input->flow_type =
+ ICE_FLTR_PTYPE_NONF_IPV6_SCTP;
+
+ if (hdr->field_selector) {
+ if (l3 == VIRTCHNL_PROTO_HDR_IPV4) {
+ input->ip.v4.src_port = sctph->source;
+ input->ip.v4.dst_port = sctph->dest;
+ } else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) {
+ input->ip.v6.src_port = sctph->source;
+ input->ip.v6.dst_port = sctph->dest;
+ }
+ }
+ break;
+ case VIRTCHNL_PROTO_HDR_L2TPV3:
+ if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3;
+ else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3;
+
+ if (hdr->field_selector)
+ input->l2tpv3_data.session_id = *((__be32 *)hdr->buffer);
+ break;
+ case VIRTCHNL_PROTO_HDR_ESP:
+ esph = (struct ip_esp_hdr *)hdr->buffer;
+ if (l3 == VIRTCHNL_PROTO_HDR_IPV4 &&
+ l4 == VIRTCHNL_PROTO_HDR_UDP)
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP;
+ else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 &&
+ l4 == VIRTCHNL_PROTO_HDR_UDP)
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP;
+ else if (l3 == VIRTCHNL_PROTO_HDR_IPV4 &&
+ l4 == VIRTCHNL_PROTO_HDR_NONE)
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_ESP;
+ else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 &&
+ l4 == VIRTCHNL_PROTO_HDR_NONE)
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_ESP;
+
+ if (l4 == VIRTCHNL_PROTO_HDR_UDP)
+ conf->inset_flag |= FDIR_INSET_FLAG_ESP_UDP;
+ else
+ conf->inset_flag |= FDIR_INSET_FLAG_ESP_IPSEC;
+
+ if (hdr->field_selector) {
+ if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+ input->ip.v4.sec_parm_idx = esph->spi;
+ else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+ input->ip.v6.sec_parm_idx = esph->spi;
+ }
+ break;
+ case VIRTCHNL_PROTO_HDR_AH:
+ ah = (struct ip_auth_hdr *)hdr->buffer;
+ if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_AH;
+ else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_AH;
+
+ if (hdr->field_selector) {
+ if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+ input->ip.v4.sec_parm_idx = ah->spi;
+ else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+ input->ip.v6.sec_parm_idx = ah->spi;
+ }
+ break;
+ case VIRTCHNL_PROTO_HDR_PFCP:
+ rawh = (u8 *)hdr->buffer;
+ s_field = (rawh[0] >> PFCP_S_OFFSET) & PFCP_S_MASK;
+ if (l3 == VIRTCHNL_PROTO_HDR_IPV4 && s_field == 0)
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE;
+ else if (l3 == VIRTCHNL_PROTO_HDR_IPV4 && s_field == 1)
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION;
+ else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 && s_field == 0)
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE;
+ else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 && s_field == 1)
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION;
+
+ if (hdr->field_selector) {
+ if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+ input->ip.v4.dst_port = cpu_to_be16(PFCP_PORT_NR);
+ else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+ input->ip.v6.dst_port = cpu_to_be16(PFCP_PORT_NR);
+ }
+ break;
+ case VIRTCHNL_PROTO_HDR_GTPU_IP:
+ rawh = (u8 *)hdr->buffer;
+ input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER;
+
+ if (hdr->field_selector)
+ input->gtpu_data.teid = *(__be32 *)(&rawh[GTPU_TEID_OFFSET]);
+ conf->ttype = ICE_FDIR_TUNNEL_TYPE_GTPU;
+ break;
+ case VIRTCHNL_PROTO_HDR_GTPU_EH:
+ rawh = (u8 *)hdr->buffer;
+
+ if (hdr->field_selector)
+ input->gtpu_data.qfi = rawh[GTPU_EH_QFI_OFFSET] & GTPU_EH_QFI_MASK;
+ conf->ttype = ICE_FDIR_TUNNEL_TYPE_GTPU_EH;
+ break;
+ default:
+ dev_dbg(dev, "Invalid header type 0x:%x for VF %d\n",
+ hdr->type, vf->vf_id);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vc_fdir_parse_action
+ * @vf: pointer to the VF info
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ *
+ * Parse the virtual channel filter's action and store them into conf
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_parse_action(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+ struct virtchnl_fdir_fltr_conf *conf)
+{
+ struct virtchnl_filter_action_set *as = &fltr->rule_cfg.action_set;
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ struct ice_fdir_fltr *input = &conf->input;
+ u32 dest_num = 0;
+ u32 mark_num = 0;
+ int i;
+
+ if (as->count > VIRTCHNL_MAX_NUM_ACTIONS) {
+ dev_dbg(dev, "Invalid action numbers:0x%x for VF %d\n",
+ as->count, vf->vf_id);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < as->count; i++) {
+ struct virtchnl_filter_action *action = &as->actions[i];
+
+ switch (action->type) {
+ case VIRTCHNL_ACTION_PASSTHRU:
+ dest_num++;
+ input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER;
+ break;
+ case VIRTCHNL_ACTION_DROP:
+ dest_num++;
+ input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DROP_PKT;
+ break;
+ case VIRTCHNL_ACTION_QUEUE:
+ dest_num++;
+ input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX;
+ input->q_index = action->act_conf.queue.index;
+ break;
+ case VIRTCHNL_ACTION_Q_REGION:
+ dest_num++;
+ input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QGROUP;
+ input->q_index = action->act_conf.queue.index;
+ input->q_region = action->act_conf.queue.region;
+ break;
+ case VIRTCHNL_ACTION_MARK:
+ mark_num++;
+ input->fltr_id = action->act_conf.mark_id;
+ input->fdid_prio = ICE_FXD_FLTR_QW1_FDID_PRI_THREE;
+ break;
+ default:
+ dev_dbg(dev, "Invalid action type:0x%x for VF %d\n",
+ action->type, vf->vf_id);
+ return -EINVAL;
+ }
+ }
+
+ if (dest_num == 0 || dest_num >= 2) {
+ dev_dbg(dev, "Invalid destination action for VF %d\n",
+ vf->vf_id);
+ return -EINVAL;
+ }
+
+ if (mark_num >= 2) {
+ dev_dbg(dev, "Too many mark actions for VF %d\n", vf->vf_id);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vc_validate_fdir_fltr - validate the virtual channel filter
+ * @vf: pointer to the VF info
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_validate_fdir_fltr(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+ struct virtchnl_fdir_fltr_conf *conf)
+{
+ struct virtchnl_proto_hdrs *proto = &fltr->rule_cfg.proto_hdrs;
+ int ret;
+
+ if (!ice_vc_validate_pattern(vf, proto))
+ return -EINVAL;
+
+ ret = ice_vc_fdir_parse_pattern(vf, fltr, conf);
+ if (ret)
+ return ret;
+
+ return ice_vc_fdir_parse_action(vf, fltr, conf);
+}
+
+/**
+ * ice_vc_fdir_comp_rules - compare if two filter rules have the same value
+ * @conf_a: FDIR configuration for filter a
+ * @conf_b: FDIR configuration for filter b
+ *
+ * Return: 0 on success, and other on error.
+ */
+static bool
+ice_vc_fdir_comp_rules(struct virtchnl_fdir_fltr_conf *conf_a,
+ struct virtchnl_fdir_fltr_conf *conf_b)
+{
+ struct ice_fdir_fltr *a = &conf_a->input;
+ struct ice_fdir_fltr *b = &conf_b->input;
+
+ if (conf_a->ttype != conf_b->ttype)
+ return false;
+ if (a->flow_type != b->flow_type)
+ return false;
+ if (memcmp(&a->ip, &b->ip, sizeof(a->ip)))
+ return false;
+ if (memcmp(&a->mask, &b->mask, sizeof(a->mask)))
+ return false;
+ if (memcmp(&a->gtpu_data, &b->gtpu_data, sizeof(a->gtpu_data)))
+ return false;
+ if (memcmp(&a->gtpu_mask, &b->gtpu_mask, sizeof(a->gtpu_mask)))
+ return false;
+ if (memcmp(&a->l2tpv3_data, &b->l2tpv3_data, sizeof(a->l2tpv3_data)))
+ return false;
+ if (memcmp(&a->l2tpv3_mask, &b->l2tpv3_mask, sizeof(a->l2tpv3_mask)))
+ return false;
+ if (memcmp(&a->ext_data, &b->ext_data, sizeof(a->ext_data)))
+ return false;
+ if (memcmp(&a->ext_mask, &b->ext_mask, sizeof(a->ext_mask)))
+ return false;
+
+ return true;
+}
+
+/**
+ * ice_vc_fdir_is_dup_fltr
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ *
+ * Check if there is duplicated rule with same conf value
+ *
+ * Return: 0 true success, and false on error.
+ */
+static bool
+ice_vc_fdir_is_dup_fltr(struct ice_vf *vf, struct virtchnl_fdir_fltr_conf *conf)
+{
+ struct ice_fdir_fltr *desc;
+ bool ret;
+
+ list_for_each_entry(desc, &vf->fdir.fdir_rule_list, fltr_node) {
+ struct virtchnl_fdir_fltr_conf *node =
+ to_fltr_conf_from_desc(desc);
+
+ ret = ice_vc_fdir_comp_rules(node, conf);
+ if (ret)
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * ice_vc_fdir_insert_entry
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ * @id: pointer to ID value allocated by driver
+ *
+ * Insert FDIR conf entry into list and allocate ID for this filter
+ *
+ * Return: 0 true success, and other on error.
+ */
+static int
+ice_vc_fdir_insert_entry(struct ice_vf *vf,
+ struct virtchnl_fdir_fltr_conf *conf, u32 *id)
+{
+ struct ice_fdir_fltr *input = &conf->input;
+ int i;
+
+ /* alloc ID corresponding with conf */
+ i = idr_alloc(&vf->fdir.fdir_rule_idr, conf, 0,
+ ICE_FDIR_MAX_FLTRS, GFP_KERNEL);
+ if (i < 0)
+ return -EINVAL;
+ *id = i;
+
+ list_add(&input->fltr_node, &vf->fdir.fdir_rule_list);
+ return 0;
+}
+
+/**
+ * ice_vc_fdir_remove_entry - remove FDIR conf entry by ID value
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ * @id: filter rule's ID
+ */
+static void
+ice_vc_fdir_remove_entry(struct ice_vf *vf,
+ struct virtchnl_fdir_fltr_conf *conf, u32 id)
+{
+ struct ice_fdir_fltr *input = &conf->input;
+
+ idr_remove(&vf->fdir.fdir_rule_idr, id);
+ list_del(&input->fltr_node);
+}
+
+/**
+ * ice_vc_fdir_lookup_entry - lookup FDIR conf entry by ID value
+ * @vf: pointer to the VF info
+ * @id: filter rule's ID
+ *
+ * Return: NULL on error, and other on success.
+ */
+static struct virtchnl_fdir_fltr_conf *
+ice_vc_fdir_lookup_entry(struct ice_vf *vf, u32 id)
+{
+ return idr_find(&vf->fdir.fdir_rule_idr, id);
+}
+
+/**
+ * ice_vc_fdir_flush_entry - remove all FDIR conf entry
+ * @vf: pointer to the VF info
+ */
+static void ice_vc_fdir_flush_entry(struct ice_vf *vf)
+{
+ struct virtchnl_fdir_fltr_conf *conf;
+ struct ice_fdir_fltr *desc, *temp;
+
+ list_for_each_entry_safe(desc, temp,
+ &vf->fdir.fdir_rule_list, fltr_node) {
+ conf = to_fltr_conf_from_desc(desc);
+ list_del(&desc->fltr_node);
+ devm_kfree(ice_pf_to_dev(vf->pf), conf);
+ }
+}
+
+/**
+ * ice_vc_fdir_write_fltr - write filter rule into hardware
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ * @add: true implies add rule, false implies del rules
+ * @is_tun: false implies non-tunnel type filter, true implies tunnel filter
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int ice_vc_fdir_write_fltr(struct ice_vf *vf,
+ struct virtchnl_fdir_fltr_conf *conf,
+ bool add, bool is_tun)
+{
+ struct ice_fdir_fltr *input = &conf->input;
+ struct ice_vsi *vsi, *ctrl_vsi;
+ struct ice_fltr_desc desc;
+ struct device *dev;
+ struct ice_pf *pf;
+ struct ice_hw *hw;
+ int ret;
+ u8 *pkt;
+
+ pf = vf->pf;
+ dev = ice_pf_to_dev(pf);
+ hw = &pf->hw;
+ vsi = pf->vsi[vf->lan_vsi_idx];
+ if (!vsi) {
+ dev_dbg(dev, "Invalid vsi for VF %d\n", vf->vf_id);
+ return -EINVAL;
+ }
+
+ input->dest_vsi = vsi->idx;
+ input->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW;
+
+ ctrl_vsi = pf->vsi[vf->ctrl_vsi_idx];
+ if (!ctrl_vsi) {
+ dev_dbg(dev, "Invalid ctrl_vsi for VF %d\n", vf->vf_id);
+ return -EINVAL;
+ }
+
+ pkt = devm_kzalloc(dev, ICE_FDIR_MAX_RAW_PKT_SIZE, GFP_KERNEL);
+ if (!pkt)
+ return -ENOMEM;
+
+ ice_fdir_get_prgm_desc(hw, input, &desc, add);
+ ret = ice_fdir_get_gen_prgm_pkt(hw, input, pkt, false, is_tun);
+ if (ret) {
+ dev_dbg(dev, "Gen training pkt for VF %d ptype %d failed\n",
+ vf->vf_id, input->flow_type);
+ goto err_free_pkt;
+ }
+
+ ret = ice_prgm_fdir_fltr(ctrl_vsi, &desc, pkt);
+ if (ret)
+ goto err_free_pkt;
+
+ return 0;
+
+err_free_pkt:
+ devm_kfree(dev, pkt);
+ return ret;
+}
+
+/**
+ * ice_vf_fdir_timer - FDIR program waiting timer interrupt handler
+ * @t: pointer to timer_list
+ */
+static void ice_vf_fdir_timer(struct timer_list *t)
+{
+ struct ice_vf_fdir_ctx *ctx_irq = from_timer(ctx_irq, t, rx_tmr);
+ struct ice_vf_fdir_ctx *ctx_done;
+ struct ice_vf_fdir *fdir;
+ unsigned long flags;
+ struct ice_vf *vf;
+ struct ice_pf *pf;
+
+ fdir = container_of(ctx_irq, struct ice_vf_fdir, ctx_irq);
+ vf = container_of(fdir, struct ice_vf, fdir);
+ ctx_done = &fdir->ctx_done;
+ pf = vf->pf;
+ spin_lock_irqsave(&fdir->ctx_lock, flags);
+ if (!(ctx_irq->flags & ICE_VF_FDIR_CTX_VALID)) {
+ spin_unlock_irqrestore(&fdir->ctx_lock, flags);
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ ctx_irq->flags &= ~ICE_VF_FDIR_CTX_VALID;
+
+ ctx_done->flags |= ICE_VF_FDIR_CTX_VALID;
+ ctx_done->conf = ctx_irq->conf;
+ ctx_done->stat = ICE_FDIR_CTX_TIMEOUT;
+ ctx_done->v_opcode = ctx_irq->v_opcode;
+ spin_unlock_irqrestore(&fdir->ctx_lock, flags);
+
+ set_bit(ICE_FD_VF_FLUSH_CTX, pf->state);
+ ice_service_task_schedule(pf);
+}
+
+/**
+ * ice_vc_fdir_irq_handler - ctrl_vsi Rx queue interrupt handler
+ * @ctrl_vsi: pointer to a VF's CTRL VSI
+ * @rx_desc: pointer to FDIR Rx queue descriptor
+ */
+void
+ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi,
+ union ice_32b_rx_flex_desc *rx_desc)
+{
+ struct ice_pf *pf = ctrl_vsi->back;
+ struct ice_vf *vf = ctrl_vsi->vf;
+ struct ice_vf_fdir_ctx *ctx_done;
+ struct ice_vf_fdir_ctx *ctx_irq;
+ struct ice_vf_fdir *fdir;
+ unsigned long flags;
+ struct device *dev;
+ int ret;
+
+ if (WARN_ON(!vf))
+ return;
+
+ fdir = &vf->fdir;
+ ctx_done = &fdir->ctx_done;
+ ctx_irq = &fdir->ctx_irq;
+ dev = ice_pf_to_dev(pf);
+ spin_lock_irqsave(&fdir->ctx_lock, flags);
+ if (!(ctx_irq->flags & ICE_VF_FDIR_CTX_VALID)) {
+ spin_unlock_irqrestore(&fdir->ctx_lock, flags);
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ ctx_irq->flags &= ~ICE_VF_FDIR_CTX_VALID;
+
+ ctx_done->flags |= ICE_VF_FDIR_CTX_VALID;
+ ctx_done->conf = ctx_irq->conf;
+ ctx_done->stat = ICE_FDIR_CTX_IRQ;
+ ctx_done->v_opcode = ctx_irq->v_opcode;
+ memcpy(&ctx_done->rx_desc, rx_desc, sizeof(*rx_desc));
+ spin_unlock_irqrestore(&fdir->ctx_lock, flags);
+
+ ret = del_timer(&ctx_irq->rx_tmr);
+ if (!ret)
+ dev_err(dev, "VF %d: Unexpected inactive timer!\n", vf->vf_id);
+
+ set_bit(ICE_FD_VF_FLUSH_CTX, pf->state);
+ ice_service_task_schedule(pf);
+}
+
+/**
+ * ice_vf_fdir_dump_info - dump FDIR information for diagnosis
+ * @vf: pointer to the VF info
+ */
+static void ice_vf_fdir_dump_info(struct ice_vf *vf)
+{
+ struct ice_vsi *vf_vsi;
+ u32 fd_size, fd_cnt;
+ struct device *dev;
+ struct ice_pf *pf;
+ struct ice_hw *hw;
+ u16 vsi_num;
+
+ pf = vf->pf;
+ hw = &pf->hw;
+ dev = ice_pf_to_dev(pf);
+ vf_vsi = ice_get_vf_vsi(vf);
+ if (!vf_vsi) {
+ dev_dbg(dev, "VF %d: invalid VSI pointer\n", vf->vf_id);
+ return;
+ }
+
+ vsi_num = ice_get_hw_vsi_num(hw, vf_vsi->idx);
+
+ fd_size = rd32(hw, VSIQF_FD_SIZE(vsi_num));
+ fd_cnt = rd32(hw, VSIQF_FD_CNT(vsi_num));
+ dev_dbg(dev, "VF %d: space allocated: guar:0x%x, be:0x%x, space consumed: guar:0x%x, be:0x%x\n",
+ vf->vf_id,
+ (fd_size & VSIQF_FD_CNT_FD_GCNT_M) >> VSIQF_FD_CNT_FD_GCNT_S,
+ (fd_size & VSIQF_FD_CNT_FD_BCNT_M) >> VSIQF_FD_CNT_FD_BCNT_S,
+ (fd_cnt & VSIQF_FD_CNT_FD_GCNT_M) >> VSIQF_FD_CNT_FD_GCNT_S,
+ (fd_cnt & VSIQF_FD_CNT_FD_BCNT_M) >> VSIQF_FD_CNT_FD_BCNT_S);
+}
+
+/**
+ * ice_vf_verify_rx_desc - verify received FDIR programming status descriptor
+ * @vf: pointer to the VF info
+ * @ctx: FDIR context info for post processing
+ * @status: virtchnl FDIR program status
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vf_verify_rx_desc(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx,
+ enum virtchnl_fdir_prgm_status *status)
+{
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ u32 stat_err, error, prog_id;
+ int ret;
+
+ stat_err = le16_to_cpu(ctx->rx_desc.wb.status_error0);
+ if (((stat_err & ICE_FXD_FLTR_WB_QW1_DD_M) >>
+ ICE_FXD_FLTR_WB_QW1_DD_S) != ICE_FXD_FLTR_WB_QW1_DD_YES) {
+ *status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+ dev_err(dev, "VF %d: Desc Done not set\n", vf->vf_id);
+ ret = -EINVAL;
+ goto err_exit;
+ }
+
+ prog_id = (stat_err & ICE_FXD_FLTR_WB_QW1_PROG_ID_M) >>
+ ICE_FXD_FLTR_WB_QW1_PROG_ID_S;
+ if (prog_id == ICE_FXD_FLTR_WB_QW1_PROG_ADD &&
+ ctx->v_opcode != VIRTCHNL_OP_ADD_FDIR_FILTER) {
+ dev_err(dev, "VF %d: Desc show add, but ctx not",
+ vf->vf_id);
+ *status = VIRTCHNL_FDIR_FAILURE_RULE_INVALID;
+ ret = -EINVAL;
+ goto err_exit;
+ }
+
+ if (prog_id == ICE_FXD_FLTR_WB_QW1_PROG_DEL &&
+ ctx->v_opcode != VIRTCHNL_OP_DEL_FDIR_FILTER) {
+ dev_err(dev, "VF %d: Desc show del, but ctx not",
+ vf->vf_id);
+ *status = VIRTCHNL_FDIR_FAILURE_RULE_INVALID;
+ ret = -EINVAL;
+ goto err_exit;
+ }
+
+ error = (stat_err & ICE_FXD_FLTR_WB_QW1_FAIL_M) >>
+ ICE_FXD_FLTR_WB_QW1_FAIL_S;
+ if (error == ICE_FXD_FLTR_WB_QW1_FAIL_YES) {
+ if (prog_id == ICE_FXD_FLTR_WB_QW1_PROG_ADD) {
+ dev_err(dev, "VF %d, Failed to add FDIR rule due to no space in the table",
+ vf->vf_id);
+ *status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+ } else {
+ dev_err(dev, "VF %d, Failed to remove FDIR rule, attempt to remove non-existent entry",
+ vf->vf_id);
+ *status = VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST;
+ }
+ ret = -EINVAL;
+ goto err_exit;
+ }
+
+ error = (stat_err & ICE_FXD_FLTR_WB_QW1_FAIL_PROF_M) >>
+ ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S;
+ if (error == ICE_FXD_FLTR_WB_QW1_FAIL_PROF_YES) {
+ dev_err(dev, "VF %d: Profile matching error", vf->vf_id);
+ *status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+ ret = -EINVAL;
+ goto err_exit;
+ }
+
+ *status = VIRTCHNL_FDIR_SUCCESS;
+
+ return 0;
+
+err_exit:
+ ice_vf_fdir_dump_info(vf);
+ return ret;
+}
+
+/**
+ * ice_vc_add_fdir_fltr_post
+ * @vf: pointer to the VF structure
+ * @ctx: FDIR context info for post processing
+ * @status: virtchnl FDIR program status
+ * @success: true implies success, false implies failure
+ *
+ * Post process for flow director add command. If success, then do post process
+ * and send back success msg by virtchnl. Otherwise, do context reversion and
+ * send back failure msg by virtchnl.
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_add_fdir_fltr_post(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx,
+ enum virtchnl_fdir_prgm_status status,
+ bool success)
+{
+ struct virtchnl_fdir_fltr_conf *conf = ctx->conf;
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ enum virtchnl_status_code v_ret;
+ struct virtchnl_fdir_add *resp;
+ int ret, len, is_tun;
+
+ v_ret = VIRTCHNL_STATUS_SUCCESS;
+ len = sizeof(*resp);
+ resp = kzalloc(len, GFP_KERNEL);
+ if (!resp) {
+ len = 0;
+ v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+ dev_dbg(dev, "VF %d: Alloc resp buf fail", vf->vf_id);
+ goto err_exit;
+ }
+
+ if (!success)
+ goto err_exit;
+
+ is_tun = 0;
+ resp->status = status;
+ resp->flow_id = conf->flow_id;
+ vf->fdir.fdir_fltr_cnt[conf->input.flow_type][is_tun]++;
+
+ ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret,
+ (u8 *)resp, len);
+ kfree(resp);
+
+ dev_dbg(dev, "VF %d: flow_id:0x%X, FDIR %s success!\n",
+ vf->vf_id, conf->flow_id,
+ (ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER) ?
+ "add" : "del");
+ return ret;
+
+err_exit:
+ if (resp)
+ resp->status = status;
+ ice_vc_fdir_remove_entry(vf, conf, conf->flow_id);
+ devm_kfree(dev, conf);
+
+ ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret,
+ (u8 *)resp, len);
+ kfree(resp);
+ return ret;
+}
+
+/**
+ * ice_vc_del_fdir_fltr_post
+ * @vf: pointer to the VF structure
+ * @ctx: FDIR context info for post processing
+ * @status: virtchnl FDIR program status
+ * @success: true implies success, false implies failure
+ *
+ * Post process for flow director del command. If success, then do post process
+ * and send back success msg by virtchnl. Otherwise, do context reversion and
+ * send back failure msg by virtchnl.
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_del_fdir_fltr_post(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx,
+ enum virtchnl_fdir_prgm_status status,
+ bool success)
+{
+ struct virtchnl_fdir_fltr_conf *conf = ctx->conf;
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ enum virtchnl_status_code v_ret;
+ struct virtchnl_fdir_del *resp;
+ int ret, len, is_tun;
+
+ v_ret = VIRTCHNL_STATUS_SUCCESS;
+ len = sizeof(*resp);
+ resp = kzalloc(len, GFP_KERNEL);
+ if (!resp) {
+ len = 0;
+ v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+ dev_dbg(dev, "VF %d: Alloc resp buf fail", vf->vf_id);
+ goto err_exit;
+ }
+
+ if (!success)
+ goto err_exit;
+
+ is_tun = 0;
+ resp->status = status;
+ ice_vc_fdir_remove_entry(vf, conf, conf->flow_id);
+ vf->fdir.fdir_fltr_cnt[conf->input.flow_type][is_tun]--;
+
+ ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret,
+ (u8 *)resp, len);
+ kfree(resp);
+
+ dev_dbg(dev, "VF %d: flow_id:0x%X, FDIR %s success!\n",
+ vf->vf_id, conf->flow_id,
+ (ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER) ?
+ "add" : "del");
+ devm_kfree(dev, conf);
+ return ret;
+
+err_exit:
+ if (resp)
+ resp->status = status;
+ if (success)
+ devm_kfree(dev, conf);
+
+ ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret,
+ (u8 *)resp, len);
+ kfree(resp);
+ return ret;
+}
+
+/**
+ * ice_flush_fdir_ctx
+ * @pf: pointer to the PF structure
+ *
+ * Flush all the pending event on ctx_done list and process them.
+ */
+void ice_flush_fdir_ctx(struct ice_pf *pf)
+{
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ if (!test_and_clear_bit(ICE_FD_VF_FLUSH_CTX, pf->state))
+ return;
+
+ mutex_lock(&pf->vfs.table_lock);
+ ice_for_each_vf(pf, bkt, vf) {
+ struct device *dev = ice_pf_to_dev(pf);
+ enum virtchnl_fdir_prgm_status status;
+ struct ice_vf_fdir_ctx *ctx;
+ unsigned long flags;
+ int ret;
+
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+ continue;
+
+ if (vf->ctrl_vsi_idx == ICE_NO_VSI)
+ continue;
+
+ ctx = &vf->fdir.ctx_done;
+ spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+ if (!(ctx->flags & ICE_VF_FDIR_CTX_VALID)) {
+ spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+ continue;
+ }
+ spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+
+ WARN_ON(ctx->stat == ICE_FDIR_CTX_READY);
+ if (ctx->stat == ICE_FDIR_CTX_TIMEOUT) {
+ status = VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT;
+ dev_err(dev, "VF %d: ctrl_vsi irq timeout\n",
+ vf->vf_id);
+ goto err_exit;
+ }
+
+ ret = ice_vf_verify_rx_desc(vf, ctx, &status);
+ if (ret)
+ goto err_exit;
+
+ if (ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER)
+ ice_vc_add_fdir_fltr_post(vf, ctx, status, true);
+ else if (ctx->v_opcode == VIRTCHNL_OP_DEL_FDIR_FILTER)
+ ice_vc_del_fdir_fltr_post(vf, ctx, status, true);
+ else
+ dev_err(dev, "VF %d: Unsupported opcode\n", vf->vf_id);
+
+ spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+ ctx->flags &= ~ICE_VF_FDIR_CTX_VALID;
+ spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+ continue;
+err_exit:
+ if (ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER)
+ ice_vc_add_fdir_fltr_post(vf, ctx, status, false);
+ else if (ctx->v_opcode == VIRTCHNL_OP_DEL_FDIR_FILTER)
+ ice_vc_del_fdir_fltr_post(vf, ctx, status, false);
+ else
+ dev_err(dev, "VF %d: Unsupported opcode\n", vf->vf_id);
+
+ spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+ ctx->flags &= ~ICE_VF_FDIR_CTX_VALID;
+ spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+ }
+ mutex_unlock(&pf->vfs.table_lock);
+}
+
+/**
+ * ice_vc_fdir_set_irq_ctx - set FDIR context info for later IRQ handler
+ * @vf: pointer to the VF structure
+ * @conf: FDIR configuration for each filter
+ * @v_opcode: virtual channel operation code
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_set_irq_ctx(struct ice_vf *vf, struct virtchnl_fdir_fltr_conf *conf,
+ enum virtchnl_ops v_opcode)
+{
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ struct ice_vf_fdir_ctx *ctx;
+ unsigned long flags;
+
+ ctx = &vf->fdir.ctx_irq;
+ spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+ if ((vf->fdir.ctx_irq.flags & ICE_VF_FDIR_CTX_VALID) ||
+ (vf->fdir.ctx_done.flags & ICE_VF_FDIR_CTX_VALID)) {
+ spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+ dev_dbg(dev, "VF %d: Last request is still in progress\n",
+ vf->vf_id);
+ return -EBUSY;
+ }
+ ctx->flags |= ICE_VF_FDIR_CTX_VALID;
+ spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+
+ ctx->conf = conf;
+ ctx->v_opcode = v_opcode;
+ ctx->stat = ICE_FDIR_CTX_READY;
+ timer_setup(&ctx->rx_tmr, ice_vf_fdir_timer, 0);
+
+ mod_timer(&ctx->rx_tmr, round_jiffies(msecs_to_jiffies(10) + jiffies));
+
+ return 0;
+}
+
+/**
+ * ice_vc_fdir_clear_irq_ctx - clear FDIR context info for IRQ handler
+ * @vf: pointer to the VF structure
+ *
+ * Return: 0 on success, and other on error.
+ */
+static void ice_vc_fdir_clear_irq_ctx(struct ice_vf *vf)
+{
+ struct ice_vf_fdir_ctx *ctx = &vf->fdir.ctx_irq;
+ unsigned long flags;
+
+ del_timer(&ctx->rx_tmr);
+ spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+ ctx->flags &= ~ICE_VF_FDIR_CTX_VALID;
+ spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+}
+
+/**
+ * ice_vc_add_fdir_fltr - add a FDIR filter for VF by the msg buffer
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Return: 0 on success, and other on error.
+ */
+int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg)
+{
+ struct virtchnl_fdir_add *fltr = (struct virtchnl_fdir_add *)msg;
+ struct virtchnl_fdir_add *stat = NULL;
+ struct virtchnl_fdir_fltr_conf *conf;
+ enum virtchnl_status_code v_ret;
+ struct device *dev;
+ struct ice_pf *pf;
+ int is_tun = 0;
+ int len = 0;
+ int ret;
+
+ pf = vf->pf;
+ dev = ice_pf_to_dev(pf);
+ ret = ice_vc_fdir_param_check(vf, fltr->vsi_id);
+ if (ret) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ dev_dbg(dev, "Parameter check for VF %d failed\n", vf->vf_id);
+ goto err_exit;
+ }
+
+ ret = ice_vf_start_ctrl_vsi(vf);
+ if (ret && (ret != -EEXIST)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ dev_err(dev, "Init FDIR for VF %d failed, ret:%d\n",
+ vf->vf_id, ret);
+ goto err_exit;
+ }
+
+ stat = kzalloc(sizeof(*stat), GFP_KERNEL);
+ if (!stat) {
+ v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+ dev_dbg(dev, "Alloc stat for VF %d failed\n", vf->vf_id);
+ goto err_exit;
+ }
+
+ conf = devm_kzalloc(dev, sizeof(*conf), GFP_KERNEL);
+ if (!conf) {
+ v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+ dev_dbg(dev, "Alloc conf for VF %d failed\n", vf->vf_id);
+ goto err_exit;
+ }
+
+ len = sizeof(*stat);
+ ret = ice_vc_validate_fdir_fltr(vf, fltr, conf);
+ if (ret) {
+ v_ret = VIRTCHNL_STATUS_SUCCESS;
+ stat->status = VIRTCHNL_FDIR_FAILURE_RULE_INVALID;
+ dev_dbg(dev, "Invalid FDIR filter from VF %d\n", vf->vf_id);
+ goto err_free_conf;
+ }
+
+ if (fltr->validate_only) {
+ v_ret = VIRTCHNL_STATUS_SUCCESS;
+ stat->status = VIRTCHNL_FDIR_SUCCESS;
+ devm_kfree(dev, conf);
+ ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_FDIR_FILTER,
+ v_ret, (u8 *)stat, len);
+ goto exit;
+ }
+
+ ret = ice_vc_fdir_config_input_set(vf, fltr, conf, is_tun);
+ if (ret) {
+ v_ret = VIRTCHNL_STATUS_SUCCESS;
+ stat->status = VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT;
+ dev_err(dev, "VF %d: FDIR input set configure failed, ret:%d\n",
+ vf->vf_id, ret);
+ goto err_free_conf;
+ }
+
+ ret = ice_vc_fdir_is_dup_fltr(vf, conf);
+ if (ret) {
+ v_ret = VIRTCHNL_STATUS_SUCCESS;
+ stat->status = VIRTCHNL_FDIR_FAILURE_RULE_EXIST;
+ dev_dbg(dev, "VF %d: duplicated FDIR rule detected\n",
+ vf->vf_id);
+ goto err_free_conf;
+ }
+
+ ret = ice_vc_fdir_insert_entry(vf, conf, &conf->flow_id);
+ if (ret) {
+ v_ret = VIRTCHNL_STATUS_SUCCESS;
+ stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+ dev_dbg(dev, "VF %d: insert FDIR list failed\n", vf->vf_id);
+ goto err_free_conf;
+ }
+
+ ret = ice_vc_fdir_set_irq_ctx(vf, conf, VIRTCHNL_OP_ADD_FDIR_FILTER);
+ if (ret) {
+ v_ret = VIRTCHNL_STATUS_SUCCESS;
+ stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+ dev_dbg(dev, "VF %d: set FDIR context failed\n", vf->vf_id);
+ goto err_rem_entry;
+ }
+
+ ret = ice_vc_fdir_write_fltr(vf, conf, true, is_tun);
+ if (ret) {
+ v_ret = VIRTCHNL_STATUS_SUCCESS;
+ stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+ dev_err(dev, "VF %d: writing FDIR rule failed, ret:%d\n",
+ vf->vf_id, ret);
+ goto err_clr_irq;
+ }
+
+exit:
+ kfree(stat);
+ return ret;
+
+err_clr_irq:
+ ice_vc_fdir_clear_irq_ctx(vf);
+err_rem_entry:
+ ice_vc_fdir_remove_entry(vf, conf, conf->flow_id);
+err_free_conf:
+ devm_kfree(dev, conf);
+err_exit:
+ ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_FDIR_FILTER, v_ret,
+ (u8 *)stat, len);
+ kfree(stat);
+ return ret;
+}
+
+/**
+ * ice_vc_del_fdir_fltr - delete a FDIR filter for VF by the msg buffer
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Return: 0 on success, and other on error.
+ */
+int ice_vc_del_fdir_fltr(struct ice_vf *vf, u8 *msg)
+{
+ struct virtchnl_fdir_del *fltr = (struct virtchnl_fdir_del *)msg;
+ struct virtchnl_fdir_del *stat = NULL;
+ struct virtchnl_fdir_fltr_conf *conf;
+ enum virtchnl_status_code v_ret;
+ struct device *dev;
+ struct ice_pf *pf;
+ int is_tun = 0;
+ int len = 0;
+ int ret;
+
+ pf = vf->pf;
+ dev = ice_pf_to_dev(pf);
+ ret = ice_vc_fdir_param_check(vf, fltr->vsi_id);
+ if (ret) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ dev_dbg(dev, "Parameter check for VF %d failed\n", vf->vf_id);
+ goto err_exit;
+ }
+
+ stat = kzalloc(sizeof(*stat), GFP_KERNEL);
+ if (!stat) {
+ v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+ dev_dbg(dev, "Alloc stat for VF %d failed\n", vf->vf_id);
+ goto err_exit;
+ }
+
+ len = sizeof(*stat);
+
+ conf = ice_vc_fdir_lookup_entry(vf, fltr->flow_id);
+ if (!conf) {
+ v_ret = VIRTCHNL_STATUS_SUCCESS;
+ stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST;
+ dev_dbg(dev, "VF %d: FDIR invalid flow_id:0x%X\n",
+ vf->vf_id, fltr->flow_id);
+ goto err_exit;
+ }
+
+ /* Just return failure when ctrl_vsi idx is invalid */
+ if (vf->ctrl_vsi_idx == ICE_NO_VSI) {
+ v_ret = VIRTCHNL_STATUS_SUCCESS;
+ stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+ dev_err(dev, "Invalid FDIR ctrl_vsi for VF %d\n", vf->vf_id);
+ goto err_exit;
+ }
+
+ ret = ice_vc_fdir_set_irq_ctx(vf, conf, VIRTCHNL_OP_DEL_FDIR_FILTER);
+ if (ret) {
+ v_ret = VIRTCHNL_STATUS_SUCCESS;
+ stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+ dev_dbg(dev, "VF %d: set FDIR context failed\n", vf->vf_id);
+ goto err_exit;
+ }
+
+ ret = ice_vc_fdir_write_fltr(vf, conf, false, is_tun);
+ if (ret) {
+ v_ret = VIRTCHNL_STATUS_SUCCESS;
+ stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+ dev_err(dev, "VF %d: writing FDIR rule failed, ret:%d\n",
+ vf->vf_id, ret);
+ goto err_del_tmr;
+ }
+
+ kfree(stat);
+
+ return ret;
+
+err_del_tmr:
+ ice_vc_fdir_clear_irq_ctx(vf);
+err_exit:
+ ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_FDIR_FILTER, v_ret,
+ (u8 *)stat, len);
+ kfree(stat);
+ return ret;
+}
+
+/**
+ * ice_vf_fdir_init - init FDIR resource for VF
+ * @vf: pointer to the VF info
+ */
+void ice_vf_fdir_init(struct ice_vf *vf)
+{
+ struct ice_vf_fdir *fdir = &vf->fdir;
+
+ idr_init(&fdir->fdir_rule_idr);
+ INIT_LIST_HEAD(&fdir->fdir_rule_list);
+
+ spin_lock_init(&fdir->ctx_lock);
+ fdir->ctx_irq.flags = 0;
+ fdir->ctx_done.flags = 0;
+ ice_vc_fdir_reset_cnt_all(fdir);
+}
+
+/**
+ * ice_vf_fdir_exit - destroy FDIR resource for VF
+ * @vf: pointer to the VF info
+ */
+void ice_vf_fdir_exit(struct ice_vf *vf)
+{
+ ice_vc_fdir_flush_entry(vf);
+ idr_destroy(&vf->fdir.fdir_rule_idr);
+ ice_vc_fdir_rem_prof_all(vf);
+ ice_vc_fdir_free_prof_all(vf);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h
new file mode 100644
index 000000000..c5bcc8d74
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _ICE_VIRTCHNL_FDIR_H_
+#define _ICE_VIRTCHNL_FDIR_H_
+
+struct ice_vf;
+struct ice_pf;
+struct ice_vsi;
+
+enum ice_fdir_ctx_stat {
+ ICE_FDIR_CTX_READY,
+ ICE_FDIR_CTX_IRQ,
+ ICE_FDIR_CTX_TIMEOUT,
+};
+
+struct ice_vf_fdir_ctx {
+ struct timer_list rx_tmr;
+ enum virtchnl_ops v_opcode;
+ enum ice_fdir_ctx_stat stat;
+ union ice_32b_rx_flex_desc rx_desc;
+#define ICE_VF_FDIR_CTX_VALID BIT(0)
+ u32 flags;
+
+ void *conf;
+};
+
+/* VF FDIR information structure */
+struct ice_vf_fdir {
+ u16 fdir_fltr_cnt[ICE_FLTR_PTYPE_MAX][ICE_FD_HW_SEG_MAX];
+ int prof_entry_cnt[ICE_FLTR_PTYPE_MAX][ICE_FD_HW_SEG_MAX];
+ struct ice_fd_hw_prof **fdir_prof;
+
+ struct idr fdir_rule_idr;
+ struct list_head fdir_rule_list;
+
+ spinlock_t ctx_lock; /* protects FDIR context info */
+ struct ice_vf_fdir_ctx ctx_irq;
+ struct ice_vf_fdir_ctx ctx_done;
+};
+
+#ifdef CONFIG_PCI_IOV
+int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg);
+int ice_vc_del_fdir_fltr(struct ice_vf *vf, u8 *msg);
+void ice_vf_fdir_init(struct ice_vf *vf);
+void ice_vf_fdir_exit(struct ice_vf *vf);
+void
+ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi,
+ union ice_32b_rx_flex_desc *rx_desc);
+void ice_flush_fdir_ctx(struct ice_pf *pf);
+#else
+static inline void
+ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi, union ice_32b_rx_flex_desc *rx_desc) { }
+static inline void ice_flush_fdir_ctx(struct ice_pf *pf) { }
+#endif /* CONFIG_PCI_IOV */
+#endif /* _ICE_VIRTCHNL_FDIR_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vlan.h b/drivers/net/ethernet/intel/ice/ice_vlan.h
new file mode 100644
index 000000000..bc4550a03
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vlan.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_VLAN_H_
+#define _ICE_VLAN_H_
+
+#include <linux/types.h>
+#include "ice_type.h"
+
+struct ice_vlan {
+ u16 tpid;
+ u16 vid;
+ u8 prio;
+};
+
+#define ICE_VLAN(tpid, vid, prio) ((struct ice_vlan){ tpid, vid, prio })
+
+#endif /* _ICE_VLAN_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vlan_mode.c b/drivers/net/ethernet/intel/ice/ice_vlan_mode.c
new file mode 100644
index 000000000..bcda2e004
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vlan_mode.c
@@ -0,0 +1,438 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice_common.h"
+
+/**
+ * ice_pkg_get_supported_vlan_mode - determine if DDP supports Double VLAN mode
+ * @hw: pointer to the HW struct
+ * @dvm: output variable to determine if DDP supports DVM(true) or SVM(false)
+ */
+static int
+ice_pkg_get_supported_vlan_mode(struct ice_hw *hw, bool *dvm)
+{
+ u16 meta_init_size = sizeof(struct ice_meta_init_section);
+ struct ice_meta_init_section *sect;
+ struct ice_buf_build *bld;
+ int status;
+
+ /* if anything fails, we assume there is no DVM support */
+ *dvm = false;
+
+ bld = ice_pkg_buf_alloc_single_section(hw,
+ ICE_SID_RXPARSER_METADATA_INIT,
+ meta_init_size, (void **)&sect);
+ if (!bld)
+ return -ENOMEM;
+
+ /* only need to read a single section */
+ sect->count = cpu_to_le16(1);
+ sect->offset = cpu_to_le16(ICE_META_VLAN_MODE_ENTRY);
+
+ status = ice_aq_upload_section(hw,
+ (struct ice_buf_hdr *)ice_pkg_buf(bld),
+ ICE_PKG_BUF_SIZE, NULL);
+ if (!status) {
+ DECLARE_BITMAP(entry, ICE_META_INIT_BITS);
+ u32 arr[ICE_META_INIT_DW_CNT];
+ u16 i;
+
+ /* convert to host bitmap format */
+ for (i = 0; i < ICE_META_INIT_DW_CNT; i++)
+ arr[i] = le32_to_cpu(sect->entry.bm[i]);
+
+ bitmap_from_arr32(entry, arr, (u16)ICE_META_INIT_BITS);
+
+ /* check if DVM is supported */
+ *dvm = test_bit(ICE_META_VLAN_MODE_BIT, entry);
+ }
+
+ ice_pkg_buf_free(hw, bld);
+
+ return status;
+}
+
+/**
+ * ice_aq_get_vlan_mode - get the VLAN mode of the device
+ * @hw: pointer to the HW structure
+ * @get_params: structure FW fills in based on the current VLAN mode config
+ *
+ * Get VLAN Mode Parameters (0x020D)
+ */
+static int
+ice_aq_get_vlan_mode(struct ice_hw *hw,
+ struct ice_aqc_get_vlan_mode *get_params)
+{
+ struct ice_aq_desc desc;
+
+ if (!get_params)
+ return -EINVAL;
+
+ ice_fill_dflt_direct_cmd_desc(&desc,
+ ice_aqc_opc_get_vlan_mode_parameters);
+
+ return ice_aq_send_cmd(hw, &desc, get_params, sizeof(*get_params),
+ NULL);
+}
+
+/**
+ * ice_aq_is_dvm_ena - query FW to check if double VLAN mode is enabled
+ * @hw: pointer to the HW structure
+ *
+ * Returns true if the hardware/firmware is configured in double VLAN mode,
+ * else return false signaling that the hardware/firmware is configured in
+ * single VLAN mode.
+ *
+ * Also, return false if this call fails for any reason (i.e. firmware doesn't
+ * support this AQ call).
+ */
+static bool ice_aq_is_dvm_ena(struct ice_hw *hw)
+{
+ struct ice_aqc_get_vlan_mode get_params = { 0 };
+ int status;
+
+ status = ice_aq_get_vlan_mode(hw, &get_params);
+ if (status) {
+ ice_debug(hw, ICE_DBG_AQ, "Failed to get VLAN mode, status %d\n",
+ status);
+ return false;
+ }
+
+ return (get_params.vlan_mode & ICE_AQ_VLAN_MODE_DVM_ENA);
+}
+
+/**
+ * ice_is_dvm_ena - check if double VLAN mode is enabled
+ * @hw: pointer to the HW structure
+ *
+ * The device is configured in single or double VLAN mode on initialization and
+ * this cannot be dynamically changed during runtime. Based on this there is no
+ * need to make an AQ call every time the driver needs to know the VLAN mode.
+ * Instead, use the cached VLAN mode.
+ */
+bool ice_is_dvm_ena(struct ice_hw *hw)
+{
+ return hw->dvm_ena;
+}
+
+/**
+ * ice_cache_vlan_mode - cache VLAN mode after DDP is downloaded
+ * @hw: pointer to the HW structure
+ *
+ * This is only called after downloading the DDP and after the global
+ * configuration lock has been released because all ports on a device need to
+ * cache the VLAN mode.
+ */
+static void ice_cache_vlan_mode(struct ice_hw *hw)
+{
+ hw->dvm_ena = ice_aq_is_dvm_ena(hw) ? true : false;
+}
+
+/**
+ * ice_pkg_supports_dvm - find out if DDP supports DVM
+ * @hw: pointer to the HW structure
+ */
+static bool ice_pkg_supports_dvm(struct ice_hw *hw)
+{
+ bool pkg_supports_dvm;
+ int status;
+
+ status = ice_pkg_get_supported_vlan_mode(hw, &pkg_supports_dvm);
+ if (status) {
+ ice_debug(hw, ICE_DBG_PKG, "Failed to get supported VLAN mode, status %d\n",
+ status);
+ return false;
+ }
+
+ return pkg_supports_dvm;
+}
+
+/**
+ * ice_fw_supports_dvm - find out if FW supports DVM
+ * @hw: pointer to the HW structure
+ */
+static bool ice_fw_supports_dvm(struct ice_hw *hw)
+{
+ struct ice_aqc_get_vlan_mode get_vlan_mode = { 0 };
+ int status;
+
+ /* If firmware returns success, then it supports DVM, else it only
+ * supports SVM
+ */
+ status = ice_aq_get_vlan_mode(hw, &get_vlan_mode);
+ if (status) {
+ ice_debug(hw, ICE_DBG_NVM, "Failed to get VLAN mode, status %d\n",
+ status);
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * ice_is_dvm_supported - check if Double VLAN Mode is supported
+ * @hw: pointer to the hardware structure
+ *
+ * Returns true if Double VLAN Mode (DVM) is supported and false if only Single
+ * VLAN Mode (SVM) is supported. In order for DVM to be supported the DDP and
+ * firmware must support it, otherwise only SVM is supported. This function
+ * should only be called while the global config lock is held and after the
+ * package has been successfully downloaded.
+ */
+static bool ice_is_dvm_supported(struct ice_hw *hw)
+{
+ if (!ice_pkg_supports_dvm(hw)) {
+ ice_debug(hw, ICE_DBG_PKG, "DDP doesn't support DVM\n");
+ return false;
+ }
+
+ if (!ice_fw_supports_dvm(hw)) {
+ ice_debug(hw, ICE_DBG_PKG, "FW doesn't support DVM\n");
+ return false;
+ }
+
+ return true;
+}
+
+#define ICE_EXTERNAL_VLAN_ID_FV_IDX 11
+#define ICE_SW_LKUP_VLAN_LOC_LKUP_IDX 1
+#define ICE_SW_LKUP_VLAN_PKT_FLAGS_LKUP_IDX 2
+#define ICE_SW_LKUP_PROMISC_VLAN_LOC_LKUP_IDX 2
+#define ICE_PKT_FLAGS_0_TO_15_FV_IDX 1
+static struct ice_update_recipe_lkup_idx_params ice_dvm_dflt_recipes[] = {
+ {
+ /* Update recipe ICE_SW_LKUP_VLAN to filter based on the
+ * outer/single VLAN in DVM
+ */
+ .rid = ICE_SW_LKUP_VLAN,
+ .fv_idx = ICE_EXTERNAL_VLAN_ID_FV_IDX,
+ .ignore_valid = true,
+ .mask = 0,
+ .mask_valid = false, /* use pre-existing mask */
+ .lkup_idx = ICE_SW_LKUP_VLAN_LOC_LKUP_IDX,
+ },
+ {
+ /* Update recipe ICE_SW_LKUP_VLAN to filter based on the VLAN
+ * packet flags to support VLAN filtering on multiple VLAN
+ * ethertypes (i.e. 0x8100 and 0x88a8) in DVM
+ */
+ .rid = ICE_SW_LKUP_VLAN,
+ .fv_idx = ICE_PKT_FLAGS_0_TO_15_FV_IDX,
+ .ignore_valid = false,
+ .mask = ICE_PKT_FLAGS_0_TO_15_VLAN_FLAGS_MASK,
+ .mask_valid = true,
+ .lkup_idx = ICE_SW_LKUP_VLAN_PKT_FLAGS_LKUP_IDX,
+ },
+ {
+ /* Update recipe ICE_SW_LKUP_PROMISC_VLAN to filter based on the
+ * outer/single VLAN in DVM
+ */
+ .rid = ICE_SW_LKUP_PROMISC_VLAN,
+ .fv_idx = ICE_EXTERNAL_VLAN_ID_FV_IDX,
+ .ignore_valid = true,
+ .mask = 0,
+ .mask_valid = false, /* use pre-existing mask */
+ .lkup_idx = ICE_SW_LKUP_PROMISC_VLAN_LOC_LKUP_IDX,
+ },
+};
+
+/**
+ * ice_dvm_update_dflt_recipes - update default switch recipes in DVM
+ * @hw: hardware structure used to update the recipes
+ */
+static int ice_dvm_update_dflt_recipes(struct ice_hw *hw)
+{
+ unsigned long i;
+
+ for (i = 0; i < ARRAY_SIZE(ice_dvm_dflt_recipes); i++) {
+ struct ice_update_recipe_lkup_idx_params *params;
+ int status;
+
+ params = &ice_dvm_dflt_recipes[i];
+
+ status = ice_update_recipe_lkup_idx(hw, params);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to update RID %d lkup_idx %d fv_idx %d mask_valid %s mask 0x%04x\n",
+ params->rid, params->lkup_idx, params->fv_idx,
+ params->mask_valid ? "true" : "false",
+ params->mask);
+ return status;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_aq_set_vlan_mode - set the VLAN mode of the device
+ * @hw: pointer to the HW structure
+ * @set_params: requested VLAN mode configuration
+ *
+ * Set VLAN Mode Parameters (0x020C)
+ */
+static int
+ice_aq_set_vlan_mode(struct ice_hw *hw,
+ struct ice_aqc_set_vlan_mode *set_params)
+{
+ u8 rdma_packet, mng_vlan_prot_id;
+ struct ice_aq_desc desc;
+
+ if (!set_params)
+ return -EINVAL;
+
+ if (set_params->l2tag_prio_tagging > ICE_AQ_VLAN_PRIO_TAG_MAX)
+ return -EINVAL;
+
+ rdma_packet = set_params->rdma_packet;
+ if (rdma_packet != ICE_AQ_SVM_VLAN_RDMA_PKT_FLAG_SETTING &&
+ rdma_packet != ICE_AQ_DVM_VLAN_RDMA_PKT_FLAG_SETTING)
+ return -EINVAL;
+
+ mng_vlan_prot_id = set_params->mng_vlan_prot_id;
+ if (mng_vlan_prot_id != ICE_AQ_VLAN_MNG_PROTOCOL_ID_OUTER &&
+ mng_vlan_prot_id != ICE_AQ_VLAN_MNG_PROTOCOL_ID_INNER)
+ return -EINVAL;
+
+ ice_fill_dflt_direct_cmd_desc(&desc,
+ ice_aqc_opc_set_vlan_mode_parameters);
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ return ice_aq_send_cmd(hw, &desc, set_params, sizeof(*set_params),
+ NULL);
+}
+
+/**
+ * ice_set_dvm - sets up software and hardware for double VLAN mode
+ * @hw: pointer to the hardware structure
+ */
+static int ice_set_dvm(struct ice_hw *hw)
+{
+ struct ice_aqc_set_vlan_mode params = { 0 };
+ int status;
+
+ params.l2tag_prio_tagging = ICE_AQ_VLAN_PRIO_TAG_OUTER_CTAG;
+ params.rdma_packet = ICE_AQ_DVM_VLAN_RDMA_PKT_FLAG_SETTING;
+ params.mng_vlan_prot_id = ICE_AQ_VLAN_MNG_PROTOCOL_ID_OUTER;
+
+ status = ice_aq_set_vlan_mode(hw, &params);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to set double VLAN mode parameters, status %d\n",
+ status);
+ return status;
+ }
+
+ status = ice_dvm_update_dflt_recipes(hw);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to update default recipes for double VLAN mode, status %d\n",
+ status);
+ return status;
+ }
+
+ status = ice_aq_set_port_params(hw->port_info, true, NULL);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to set port in double VLAN mode, status %d\n",
+ status);
+ return status;
+ }
+
+ status = ice_set_dvm_boost_entries(hw);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to set boost TCAM entries for double VLAN mode, status %d\n",
+ status);
+ return status;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_set_svm - set single VLAN mode
+ * @hw: pointer to the HW structure
+ */
+static int ice_set_svm(struct ice_hw *hw)
+{
+ struct ice_aqc_set_vlan_mode *set_params;
+ int status;
+
+ status = ice_aq_set_port_params(hw->port_info, false, NULL);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to set port parameters for single VLAN mode\n");
+ return status;
+ }
+
+ set_params = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*set_params),
+ GFP_KERNEL);
+ if (!set_params)
+ return -ENOMEM;
+
+ /* default configuration for SVM configurations */
+ set_params->l2tag_prio_tagging = ICE_AQ_VLAN_PRIO_TAG_INNER_CTAG;
+ set_params->rdma_packet = ICE_AQ_SVM_VLAN_RDMA_PKT_FLAG_SETTING;
+ set_params->mng_vlan_prot_id = ICE_AQ_VLAN_MNG_PROTOCOL_ID_INNER;
+
+ status = ice_aq_set_vlan_mode(hw, set_params);
+ if (status)
+ ice_debug(hw, ICE_DBG_INIT, "Failed to configure port in single VLAN mode\n");
+
+ devm_kfree(ice_hw_to_dev(hw), set_params);
+ return status;
+}
+
+/**
+ * ice_set_vlan_mode
+ * @hw: pointer to the HW structure
+ */
+int ice_set_vlan_mode(struct ice_hw *hw)
+{
+ if (!ice_is_dvm_supported(hw))
+ return 0;
+
+ if (!ice_set_dvm(hw))
+ return 0;
+
+ return ice_set_svm(hw);
+}
+
+/**
+ * ice_print_dvm_not_supported - print if DDP and/or FW doesn't support DVM
+ * @hw: pointer to the HW structure
+ *
+ * The purpose of this function is to print that QinQ is not supported due to
+ * incompatibilty from the DDP and/or FW. This will give a hint to the user to
+ * update one and/or both components if they expect QinQ functionality.
+ */
+static void ice_print_dvm_not_supported(struct ice_hw *hw)
+{
+ bool pkg_supports_dvm = ice_pkg_supports_dvm(hw);
+ bool fw_supports_dvm = ice_fw_supports_dvm(hw);
+
+ if (!fw_supports_dvm && !pkg_supports_dvm)
+ dev_info(ice_hw_to_dev(hw), "QinQ functionality cannot be enabled on this device. Update your DDP package and NVM to versions that support QinQ.\n");
+ else if (!pkg_supports_dvm)
+ dev_info(ice_hw_to_dev(hw), "QinQ functionality cannot be enabled on this device. Update your DDP package to a version that supports QinQ.\n");
+ else if (!fw_supports_dvm)
+ dev_info(ice_hw_to_dev(hw), "QinQ functionality cannot be enabled on this device. Update your NVM to a version that supports QinQ.\n");
+}
+
+/**
+ * ice_post_pkg_dwnld_vlan_mode_cfg - configure VLAN mode after DDP download
+ * @hw: pointer to the HW structure
+ *
+ * This function is meant to configure any VLAN mode specific functionality
+ * after the global configuration lock has been released and the DDP has been
+ * downloaded.
+ *
+ * Since only one PF downloads the DDP and configures the VLAN mode there needs
+ * to be a way to configure the other PFs after the DDP has been downloaded and
+ * the global configuration lock has been released. All such code should go in
+ * this function.
+ */
+void ice_post_pkg_dwnld_vlan_mode_cfg(struct ice_hw *hw)
+{
+ ice_cache_vlan_mode(hw);
+
+ if (ice_is_dvm_ena(hw))
+ ice_change_proto_id_to_dvm();
+ else
+ ice_print_dvm_not_supported(hw);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vlan_mode.h b/drivers/net/ethernet/intel/ice/ice_vlan_mode.h
new file mode 100644
index 000000000..a0fb743d0
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vlan_mode.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_VLAN_MODE_H_
+#define _ICE_VLAN_MODE_H_
+
+struct ice_hw;
+
+bool ice_is_dvm_ena(struct ice_hw *hw);
+int ice_set_vlan_mode(struct ice_hw *hw);
+void ice_post_pkg_dwnld_vlan_mode_cfg(struct ice_hw *hw);
+
+#endif /* _ICE_VLAN_MODE_H */
diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
new file mode 100644
index 000000000..5b4a0abb4
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
@@ -0,0 +1,707 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice_vsi_vlan_lib.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
+#include "ice.h"
+
+static void print_invalid_tpid(struct ice_vsi *vsi, u16 tpid)
+{
+ dev_err(ice_pf_to_dev(vsi->back), "%s %d specified invalid VLAN tpid 0x%04x\n",
+ ice_vsi_type_str(vsi->type), vsi->idx, tpid);
+}
+
+/**
+ * validate_vlan - check if the ice_vlan passed in is valid
+ * @vsi: VSI used for printing error message
+ * @vlan: ice_vlan structure to validate
+ *
+ * Return true if the VLAN TPID is valid or if the VLAN TPID is 0 and the VLAN
+ * VID is 0, which allows for non-zero VLAN filters with the specified VLAN TPID
+ * and untagged VLAN 0 filters to be added to the prune list respectively.
+ */
+static bool validate_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+ if (vlan->tpid != ETH_P_8021Q && vlan->tpid != ETH_P_8021AD &&
+ vlan->tpid != ETH_P_QINQ1 && (vlan->tpid || vlan->vid)) {
+ print_invalid_tpid(vsi, vlan->tpid);
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * ice_vsi_add_vlan - default add VLAN implementation for all VSI types
+ * @vsi: VSI being configured
+ * @vlan: VLAN filter to add
+ */
+int ice_vsi_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+ int err;
+
+ if (!validate_vlan(vsi, vlan))
+ return -EINVAL;
+
+ err = ice_fltr_add_vlan(vsi, vlan);
+ if (err && err != -EEXIST) {
+ dev_err(ice_pf_to_dev(vsi->back), "Failure Adding VLAN %d on VSI %i, status %d\n",
+ vlan->vid, vsi->vsi_num, err);
+ return err;
+ }
+
+ vsi->num_vlan++;
+ return 0;
+}
+
+/**
+ * ice_vsi_del_vlan - default del VLAN implementation for all VSI types
+ * @vsi: VSI being configured
+ * @vlan: VLAN filter to delete
+ */
+int ice_vsi_del_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ int err;
+
+ if (!validate_vlan(vsi, vlan))
+ return -EINVAL;
+
+ dev = ice_pf_to_dev(pf);
+
+ err = ice_fltr_remove_vlan(vsi, vlan);
+ if (!err)
+ vsi->num_vlan--;
+ else if (err == -ENOENT || err == -EBUSY)
+ err = 0;
+ else
+ dev_err(dev, "Error removing VLAN %d on VSI %i error: %d\n",
+ vlan->vid, vsi->vsi_num, err);
+
+ return err;
+}
+
+/**
+ * ice_vsi_manage_vlan_insertion - Manage VLAN insertion for the VSI for Tx
+ * @vsi: the VSI being changed
+ */
+static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_vsi_ctx *ctxt;
+ int err;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ /* Here we are configuring the VSI to let the driver add VLAN tags by
+ * setting inner_vlan_flags to ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL. The actual VLAN tag
+ * insertion happens in the Tx hot path, in ice_tx_map.
+ */
+ ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL;
+
+ /* Preserve existing VLAN strip setting */
+ ctxt->info.inner_vlan_flags |= (vsi->info.inner_vlan_flags &
+ ICE_AQ_VSI_INNER_VLAN_EMODE_M);
+
+ ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
+
+ err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (err) {
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN insert failed, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ goto out;
+ }
+
+ vsi->info.inner_vlan_flags = ctxt->info.inner_vlan_flags;
+out:
+ kfree(ctxt);
+ return err;
+}
+
+/**
+ * ice_vsi_manage_vlan_stripping - Manage VLAN stripping for the VSI for Rx
+ * @vsi: the VSI being changed
+ * @ena: boolean value indicating if this is a enable or disable request
+ */
+static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_vsi_ctx *ctxt;
+ int err;
+
+ /* do not allow modifying VLAN stripping when a port VLAN is configured
+ * on this VSI
+ */
+ if (vsi->info.port_based_inner_vlan)
+ return 0;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ /* Here we are configuring what the VSI should do with the VLAN tag in
+ * the Rx packet. We can either leave the tag in the packet or put it in
+ * the Rx descriptor.
+ */
+ if (ena)
+ /* Strip VLAN tag from Rx packet and put it in the desc */
+ ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH;
+ else
+ /* Disable stripping. Leave tag in packet */
+ ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING;
+
+ /* Allow all packets untagged/tagged */
+ ctxt->info.inner_vlan_flags |= ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL;
+
+ ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
+
+ err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (err) {
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN strip failed, ena = %d err %d aq_err %s\n",
+ ena, err, ice_aq_str(hw->adminq.sq_last_status));
+ goto out;
+ }
+
+ vsi->info.inner_vlan_flags = ctxt->info.inner_vlan_flags;
+out:
+ kfree(ctxt);
+ return err;
+}
+
+int ice_vsi_ena_inner_stripping(struct ice_vsi *vsi, const u16 tpid)
+{
+ if (tpid != ETH_P_8021Q) {
+ print_invalid_tpid(vsi, tpid);
+ return -EINVAL;
+ }
+
+ return ice_vsi_manage_vlan_stripping(vsi, true);
+}
+
+int ice_vsi_dis_inner_stripping(struct ice_vsi *vsi)
+{
+ return ice_vsi_manage_vlan_stripping(vsi, false);
+}
+
+int ice_vsi_ena_inner_insertion(struct ice_vsi *vsi, const u16 tpid)
+{
+ if (tpid != ETH_P_8021Q) {
+ print_invalid_tpid(vsi, tpid);
+ return -EINVAL;
+ }
+
+ return ice_vsi_manage_vlan_insertion(vsi);
+}
+
+int ice_vsi_dis_inner_insertion(struct ice_vsi *vsi)
+{
+ return ice_vsi_manage_vlan_insertion(vsi);
+}
+
+/**
+ * __ice_vsi_set_inner_port_vlan - set port VLAN VSI context settings to enable a port VLAN
+ * @vsi: the VSI to update
+ * @pvid_info: VLAN ID and QoS used to set the PVID VSI context field
+ */
+static int __ice_vsi_set_inner_port_vlan(struct ice_vsi *vsi, u16 pvid_info)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_aqc_vsi_props *info;
+ struct ice_vsi_ctx *ctxt;
+ int ret;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ ctxt->info = vsi->info;
+ info = &ctxt->info;
+ info->inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ACCEPTUNTAGGED |
+ ICE_AQ_VSI_INNER_VLAN_INSERT_PVID |
+ ICE_AQ_VSI_INNER_VLAN_EMODE_STR;
+ info->sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+
+ info->port_based_inner_vlan = cpu_to_le16(pvid_info);
+ info->valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
+ ICE_AQ_VSI_PROP_SW_VALID);
+
+ ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (ret) {
+ dev_info(ice_hw_to_dev(hw), "update VSI for port VLAN failed, err %d aq_err %s\n",
+ ret, ice_aq_str(hw->adminq.sq_last_status));
+ goto out;
+ }
+
+ vsi->info.inner_vlan_flags = info->inner_vlan_flags;
+ vsi->info.sw_flags2 = info->sw_flags2;
+ vsi->info.port_based_inner_vlan = info->port_based_inner_vlan;
+out:
+ kfree(ctxt);
+ return ret;
+}
+
+int ice_vsi_set_inner_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+ u16 port_vlan_info;
+
+ if (vlan->tpid != ETH_P_8021Q)
+ return -EINVAL;
+
+ if (vlan->prio > 7)
+ return -EINVAL;
+
+ port_vlan_info = vlan->vid | (vlan->prio << VLAN_PRIO_SHIFT);
+
+ return __ice_vsi_set_inner_port_vlan(vsi, port_vlan_info);
+}
+
+/**
+ * ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI
+ * @vsi: VSI to enable or disable VLAN pruning on
+ * @ena: set to true to enable VLAN pruning and false to disable it
+ *
+ * returns 0 if VSI is updated, negative otherwise
+ */
+static int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena)
+{
+ struct ice_vsi_ctx *ctxt;
+ struct ice_pf *pf;
+ int status;
+
+ if (!vsi)
+ return -EINVAL;
+
+ /* Don't enable VLAN pruning if the netdev is currently in promiscuous
+ * mode. VLAN pruning will be enabled when the interface exits
+ * promiscuous mode if any VLAN filters are active.
+ */
+ if (vsi->netdev && vsi->netdev->flags & IFF_PROMISC && ena)
+ return 0;
+
+ pf = vsi->back;
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ ctxt->info = vsi->info;
+
+ if (ena)
+ ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+ else
+ ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+
+ ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
+
+ status = ice_update_vsi(&pf->hw, vsi->idx, ctxt, NULL);
+ if (status) {
+ netdev_err(vsi->netdev, "%sabling VLAN pruning on VSI handle: %d, VSI HW ID: %d failed, err = %d, aq_err = %s\n",
+ ena ? "En" : "Dis", vsi->idx, vsi->vsi_num, status,
+ ice_aq_str(pf->hw.adminq.sq_last_status));
+ goto err_out;
+ }
+
+ vsi->info.sw_flags2 = ctxt->info.sw_flags2;
+
+ kfree(ctxt);
+ return 0;
+
+err_out:
+ kfree(ctxt);
+ return status;
+}
+
+int ice_vsi_ena_rx_vlan_filtering(struct ice_vsi *vsi)
+{
+ return ice_cfg_vlan_pruning(vsi, true);
+}
+
+int ice_vsi_dis_rx_vlan_filtering(struct ice_vsi *vsi)
+{
+ return ice_cfg_vlan_pruning(vsi, false);
+}
+
+static int ice_cfg_vlan_antispoof(struct ice_vsi *vsi, bool enable)
+{
+ struct ice_vsi_ctx *ctx;
+ int err;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->info.sec_flags = vsi->info.sec_flags;
+ ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+
+ if (enable)
+ ctx->info.sec_flags |= ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+ ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S;
+ else
+ ctx->info.sec_flags &= ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+ ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+
+ err = ice_update_vsi(&vsi->back->hw, vsi->idx, ctx, NULL);
+ if (err)
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to configure Tx VLAN anti-spoof %s for VSI %d, error %d\n",
+ enable ? "ON" : "OFF", vsi->vsi_num, err);
+ else
+ vsi->info.sec_flags = ctx->info.sec_flags;
+
+ kfree(ctx);
+
+ return err;
+}
+
+int ice_vsi_ena_tx_vlan_filtering(struct ice_vsi *vsi)
+{
+ return ice_cfg_vlan_antispoof(vsi, true);
+}
+
+int ice_vsi_dis_tx_vlan_filtering(struct ice_vsi *vsi)
+{
+ return ice_cfg_vlan_antispoof(vsi, false);
+}
+
+/**
+ * tpid_to_vsi_outer_vlan_type - convert from TPID to VSI context based tag_type
+ * @tpid: tpid used to translate into VSI context based tag_type
+ * @tag_type: output variable to hold the VSI context based tag type
+ */
+static int tpid_to_vsi_outer_vlan_type(u16 tpid, u8 *tag_type)
+{
+ switch (tpid) {
+ case ETH_P_8021Q:
+ *tag_type = ICE_AQ_VSI_OUTER_TAG_VLAN_8100;
+ break;
+ case ETH_P_8021AD:
+ *tag_type = ICE_AQ_VSI_OUTER_TAG_STAG;
+ break;
+ case ETH_P_QINQ1:
+ *tag_type = ICE_AQ_VSI_OUTER_TAG_VLAN_9100;
+ break;
+ default:
+ *tag_type = 0;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_vsi_ena_outer_stripping - enable outer VLAN stripping
+ * @vsi: VSI to configure
+ * @tpid: TPID to enable outer VLAN stripping for
+ *
+ * Enable outer VLAN stripping via VSI context. This function should only be
+ * used if DVM is supported. Also, this function should never be called directly
+ * as it should be part of ice_vsi_vlan_ops if it's needed.
+ *
+ * Since the VSI context only supports a single TPID for insertion and
+ * stripping, setting the TPID for stripping will affect the TPID for insertion.
+ * Callers need to be aware of this limitation.
+ *
+ * Only modify outer VLAN stripping settings and the VLAN TPID. Outer VLAN
+ * insertion settings are unmodified.
+ *
+ * This enables hardware to strip a VLAN tag with the specified TPID to be
+ * stripped from the packet and placed in the receive descriptor.
+ */
+int ice_vsi_ena_outer_stripping(struct ice_vsi *vsi, u16 tpid)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_vsi_ctx *ctxt;
+ u8 tag_type;
+ int err;
+
+ /* do not allow modifying VLAN stripping when a port VLAN is configured
+ * on this VSI
+ */
+ if (vsi->info.port_based_outer_vlan)
+ return 0;
+
+ if (tpid_to_vsi_outer_vlan_type(tpid, &tag_type))
+ return -EINVAL;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ ctxt->info.valid_sections =
+ cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID);
+ /* clear current outer VLAN strip settings */
+ ctxt->info.outer_vlan_flags = vsi->info.outer_vlan_flags &
+ ~(ICE_AQ_VSI_OUTER_VLAN_EMODE_M | ICE_AQ_VSI_OUTER_TAG_TYPE_M);
+ ctxt->info.outer_vlan_flags |=
+ ((ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW_BOTH <<
+ ICE_AQ_VSI_OUTER_VLAN_EMODE_S) |
+ ((tag_type << ICE_AQ_VSI_OUTER_TAG_TYPE_S) &
+ ICE_AQ_VSI_OUTER_TAG_TYPE_M));
+
+ err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (err)
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for enabling outer VLAN stripping failed, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ else
+ vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags;
+
+ kfree(ctxt);
+ return err;
+}
+
+/**
+ * ice_vsi_dis_outer_stripping - disable outer VLAN stripping
+ * @vsi: VSI to configure
+ *
+ * Disable outer VLAN stripping via VSI context. This function should only be
+ * used if DVM is supported. Also, this function should never be called directly
+ * as it should be part of ice_vsi_vlan_ops if it's needed.
+ *
+ * Only modify the outer VLAN stripping settings. The VLAN TPID and outer VLAN
+ * insertion settings are unmodified.
+ *
+ * This tells the hardware to not strip any VLAN tagged packets, thus leaving
+ * them in the packet. This enables software offloaded VLAN stripping and
+ * disables hardware offloaded VLAN stripping.
+ */
+int ice_vsi_dis_outer_stripping(struct ice_vsi *vsi)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_vsi_ctx *ctxt;
+ int err;
+
+ if (vsi->info.port_based_outer_vlan)
+ return 0;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ ctxt->info.valid_sections =
+ cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID);
+ /* clear current outer VLAN strip settings */
+ ctxt->info.outer_vlan_flags = vsi->info.outer_vlan_flags &
+ ~ICE_AQ_VSI_OUTER_VLAN_EMODE_M;
+ ctxt->info.outer_vlan_flags |= ICE_AQ_VSI_OUTER_VLAN_EMODE_NOTHING <<
+ ICE_AQ_VSI_OUTER_VLAN_EMODE_S;
+
+ err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (err)
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for disabling outer VLAN stripping failed, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ else
+ vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags;
+
+ kfree(ctxt);
+ return err;
+}
+
+/**
+ * ice_vsi_ena_outer_insertion - enable outer VLAN insertion
+ * @vsi: VSI to configure
+ * @tpid: TPID to enable outer VLAN insertion for
+ *
+ * Enable outer VLAN insertion via VSI context. This function should only be
+ * used if DVM is supported. Also, this function should never be called directly
+ * as it should be part of ice_vsi_vlan_ops if it's needed.
+ *
+ * Since the VSI context only supports a single TPID for insertion and
+ * stripping, setting the TPID for insertion will affect the TPID for stripping.
+ * Callers need to be aware of this limitation.
+ *
+ * Only modify outer VLAN insertion settings and the VLAN TPID. Outer VLAN
+ * stripping settings are unmodified.
+ *
+ * This allows a VLAN tag with the specified TPID to be inserted in the transmit
+ * descriptor.
+ */
+int ice_vsi_ena_outer_insertion(struct ice_vsi *vsi, u16 tpid)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_vsi_ctx *ctxt;
+ u8 tag_type;
+ int err;
+
+ if (vsi->info.port_based_outer_vlan)
+ return 0;
+
+ if (tpid_to_vsi_outer_vlan_type(tpid, &tag_type))
+ return -EINVAL;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ ctxt->info.valid_sections =
+ cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID);
+ /* clear current outer VLAN insertion settings */
+ ctxt->info.outer_vlan_flags = vsi->info.outer_vlan_flags &
+ ~(ICE_AQ_VSI_OUTER_VLAN_PORT_BASED_INSERT |
+ ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC |
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M |
+ ICE_AQ_VSI_OUTER_TAG_TYPE_M);
+ ctxt->info.outer_vlan_flags |=
+ ((ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL <<
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) &
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M) |
+ ((tag_type << ICE_AQ_VSI_OUTER_TAG_TYPE_S) &
+ ICE_AQ_VSI_OUTER_TAG_TYPE_M);
+
+ err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (err)
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for enabling outer VLAN insertion failed, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ else
+ vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags;
+
+ kfree(ctxt);
+ return err;
+}
+
+/**
+ * ice_vsi_dis_outer_insertion - disable outer VLAN insertion
+ * @vsi: VSI to configure
+ *
+ * Disable outer VLAN insertion via VSI context. This function should only be
+ * used if DVM is supported. Also, this function should never be called directly
+ * as it should be part of ice_vsi_vlan_ops if it's needed.
+ *
+ * Only modify the outer VLAN insertion settings. The VLAN TPID and outer VLAN
+ * settings are unmodified.
+ *
+ * This tells the hardware to not allow any VLAN tagged packets in the transmit
+ * descriptor. This enables software offloaded VLAN insertion and disables
+ * hardware offloaded VLAN insertion.
+ */
+int ice_vsi_dis_outer_insertion(struct ice_vsi *vsi)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_vsi_ctx *ctxt;
+ int err;
+
+ if (vsi->info.port_based_outer_vlan)
+ return 0;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ ctxt->info.valid_sections =
+ cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID);
+ /* clear current outer VLAN insertion settings */
+ ctxt->info.outer_vlan_flags = vsi->info.outer_vlan_flags &
+ ~(ICE_AQ_VSI_OUTER_VLAN_PORT_BASED_INSERT |
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M);
+ ctxt->info.outer_vlan_flags |=
+ ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC |
+ ((ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL <<
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) &
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M);
+
+ err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (err)
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for disabling outer VLAN insertion failed, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ else
+ vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags;
+
+ kfree(ctxt);
+ return err;
+}
+
+/**
+ * __ice_vsi_set_outer_port_vlan - set the outer port VLAN and related settings
+ * @vsi: VSI to configure
+ * @vlan_info: packed u16 that contains the VLAN prio and ID
+ * @tpid: TPID of the port VLAN
+ *
+ * Set the port VLAN prio, ID, and TPID.
+ *
+ * Enable VLAN pruning so the VSI doesn't receive any traffic that doesn't match
+ * a VLAN prune rule. The caller should take care to add a VLAN prune rule that
+ * matches the port VLAN ID and TPID.
+ *
+ * Tell hardware to strip outer VLAN tagged packets on receive and don't put
+ * them in the receive descriptor. VSI(s) in port VLANs should not be aware of
+ * the port VLAN ID or TPID they are assigned to.
+ *
+ * Tell hardware to prevent outer VLAN tag insertion on transmit and only allow
+ * untagged outer packets from the transmit descriptor.
+ *
+ * Also, tell the hardware to insert the port VLAN on transmit.
+ */
+static int
+__ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, u16 vlan_info, u16 tpid)
+{
+ struct ice_hw *hw = &vsi->back->hw;
+ struct ice_vsi_ctx *ctxt;
+ u8 tag_type;
+ int err;
+
+ if (tpid_to_vsi_outer_vlan_type(tpid, &tag_type))
+ return -EINVAL;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return -ENOMEM;
+
+ ctxt->info = vsi->info;
+
+ ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+
+ ctxt->info.port_based_outer_vlan = cpu_to_le16(vlan_info);
+ ctxt->info.outer_vlan_flags =
+ (ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW <<
+ ICE_AQ_VSI_OUTER_VLAN_EMODE_S) |
+ ((tag_type << ICE_AQ_VSI_OUTER_TAG_TYPE_S) &
+ ICE_AQ_VSI_OUTER_TAG_TYPE_M) |
+ ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC |
+ (ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ACCEPTUNTAGGED <<
+ ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) |
+ ICE_AQ_VSI_OUTER_VLAN_PORT_BASED_INSERT;
+
+ ctxt->info.valid_sections =
+ cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID |
+ ICE_AQ_VSI_PROP_SW_VALID);
+
+ err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (err) {
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for setting outer port based VLAN failed, err %d aq_err %s\n",
+ err, ice_aq_str(hw->adminq.sq_last_status));
+ } else {
+ vsi->info.port_based_outer_vlan = ctxt->info.port_based_outer_vlan;
+ vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags;
+ vsi->info.sw_flags2 = ctxt->info.sw_flags2;
+ }
+
+ kfree(ctxt);
+ return err;
+}
+
+/**
+ * ice_vsi_set_outer_port_vlan - public version of __ice_vsi_set_outer_port_vlan
+ * @vsi: VSI to configure
+ * @vlan: ice_vlan structure used to set the port VLAN
+ *
+ * Set the outer port VLAN via VSI context. This function should only be
+ * used if DVM is supported. Also, this function should never be called directly
+ * as it should be part of ice_vsi_vlan_ops if it's needed.
+ *
+ * This function does not support clearing the port VLAN as there is currently
+ * no use case for this.
+ *
+ * Use the ice_vlan structure passed in to set this VSI in a port VLAN.
+ */
+int ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+ u16 port_vlan_info;
+
+ if (vlan->prio > (VLAN_PRIO_MASK >> VLAN_PRIO_SHIFT))
+ return -EINVAL;
+
+ port_vlan_info = vlan->vid | (vlan->prio << VLAN_PRIO_SHIFT);
+
+ return __ice_vsi_set_outer_port_vlan(vsi, port_vlan_info, vlan->tpid);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h
new file mode 100644
index 000000000..f45990949
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_VSI_VLAN_LIB_H_
+#define _ICE_VSI_VLAN_LIB_H_
+
+#include <linux/types.h>
+#include "ice_vlan.h"
+
+struct ice_vsi;
+
+int ice_vsi_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+int ice_vsi_del_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+
+int ice_vsi_ena_inner_stripping(struct ice_vsi *vsi, u16 tpid);
+int ice_vsi_dis_inner_stripping(struct ice_vsi *vsi);
+int ice_vsi_ena_inner_insertion(struct ice_vsi *vsi, u16 tpid);
+int ice_vsi_dis_inner_insertion(struct ice_vsi *vsi);
+int ice_vsi_set_inner_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+
+int ice_vsi_ena_rx_vlan_filtering(struct ice_vsi *vsi);
+int ice_vsi_dis_rx_vlan_filtering(struct ice_vsi *vsi);
+int ice_vsi_ena_tx_vlan_filtering(struct ice_vsi *vsi);
+int ice_vsi_dis_tx_vlan_filtering(struct ice_vsi *vsi);
+
+int ice_vsi_ena_outer_stripping(struct ice_vsi *vsi, u16 tpid);
+int ice_vsi_dis_outer_stripping(struct ice_vsi *vsi);
+int ice_vsi_ena_outer_insertion(struct ice_vsi *vsi, u16 tpid);
+int ice_vsi_dis_outer_insertion(struct ice_vsi *vsi);
+int ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+
+#endif /* _ICE_VSI_VLAN_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c
new file mode 100644
index 000000000..4a6c850d8
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice_pf_vsi_vlan_ops.h"
+#include "ice_vf_vsi_vlan_ops.h"
+#include "ice_lib.h"
+#include "ice.h"
+
+static int
+op_unsupported_vlan_arg(struct ice_vsi * __always_unused vsi,
+ struct ice_vlan * __always_unused vlan)
+{
+ return -EOPNOTSUPP;
+}
+
+static int
+op_unsupported_tpid_arg(struct ice_vsi *__always_unused vsi,
+ u16 __always_unused tpid)
+{
+ return -EOPNOTSUPP;
+}
+
+static int op_unsupported(struct ice_vsi *__always_unused vsi)
+{
+ return -EOPNOTSUPP;
+}
+
+/* If any new ops are added to the VSI VLAN ops interface then an unsupported
+ * implementation should be set here.
+ */
+static struct ice_vsi_vlan_ops ops_unsupported = {
+ .add_vlan = op_unsupported_vlan_arg,
+ .del_vlan = op_unsupported_vlan_arg,
+ .ena_stripping = op_unsupported_tpid_arg,
+ .dis_stripping = op_unsupported,
+ .ena_insertion = op_unsupported_tpid_arg,
+ .dis_insertion = op_unsupported,
+ .ena_rx_filtering = op_unsupported,
+ .dis_rx_filtering = op_unsupported,
+ .ena_tx_filtering = op_unsupported,
+ .dis_tx_filtering = op_unsupported,
+ .set_port_vlan = op_unsupported_vlan_arg,
+};
+
+/**
+ * ice_vsi_init_unsupported_vlan_ops - init all VSI VLAN ops to unsupported
+ * @vsi: VSI to initialize VSI VLAN ops to unsupported for
+ *
+ * By default all inner and outer VSI VLAN ops return -EOPNOTSUPP. This was done
+ * as oppsed to leaving the ops null to prevent unexpected crashes. Instead if
+ * an unsupported VSI VLAN op is called it will just return -EOPNOTSUPP.
+ *
+ */
+static void ice_vsi_init_unsupported_vlan_ops(struct ice_vsi *vsi)
+{
+ vsi->outer_vlan_ops = ops_unsupported;
+ vsi->inner_vlan_ops = ops_unsupported;
+}
+
+/**
+ * ice_vsi_init_vlan_ops - initialize type specific VSI VLAN ops
+ * @vsi: VSI to initialize ops for
+ *
+ * If any VSI types are added and/or require different ops than the PF or VF VSI
+ * then they will have to add a case here to handle that. Also, VSI type
+ * specific files should be added in the same manner that was done for PF VSI.
+ */
+void ice_vsi_init_vlan_ops(struct ice_vsi *vsi)
+{
+ /* Initialize all VSI types to have unsupported VSI VLAN ops */
+ ice_vsi_init_unsupported_vlan_ops(vsi);
+
+ switch (vsi->type) {
+ case ICE_VSI_PF:
+ case ICE_VSI_SWITCHDEV_CTRL:
+ ice_pf_vsi_init_vlan_ops(vsi);
+ break;
+ case ICE_VSI_VF:
+ ice_vf_vsi_init_vlan_ops(vsi);
+ break;
+ default:
+ dev_dbg(ice_pf_to_dev(vsi->back), "%s does not support VLAN operations\n",
+ ice_vsi_type_str(vsi->type));
+ break;
+ }
+}
+
+/**
+ * ice_get_compat_vsi_vlan_ops - Get VSI VLAN ops based on VLAN mode
+ * @vsi: VSI used to get the VSI VLAN ops
+ *
+ * This function is meant to be used when the caller doesn't know which VLAN ops
+ * to use (i.e. inner or outer). This allows backward compatibility for VLANs
+ * since most of the Outer VSI VLAN functins are not supported when
+ * the device is configured in Single VLAN Mode (SVM).
+ */
+struct ice_vsi_vlan_ops *ice_get_compat_vsi_vlan_ops(struct ice_vsi *vsi)
+{
+ if (ice_is_dvm_ena(&vsi->back->hw))
+ return &vsi->outer_vlan_ops;
+ else
+ return &vsi->inner_vlan_ops;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h
new file mode 100644
index 000000000..5b47568f6
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_VSI_VLAN_OPS_H_
+#define _ICE_VSI_VLAN_OPS_H_
+
+#include "ice_type.h"
+#include "ice_vsi_vlan_lib.h"
+
+struct ice_vsi;
+
+struct ice_vsi_vlan_ops {
+ int (*add_vlan)(struct ice_vsi *vsi, struct ice_vlan *vlan);
+ int (*del_vlan)(struct ice_vsi *vsi, struct ice_vlan *vlan);
+ int (*ena_stripping)(struct ice_vsi *vsi, const u16 tpid);
+ int (*dis_stripping)(struct ice_vsi *vsi);
+ int (*ena_insertion)(struct ice_vsi *vsi, const u16 tpid);
+ int (*dis_insertion)(struct ice_vsi *vsi);
+ int (*ena_rx_filtering)(struct ice_vsi *vsi);
+ int (*dis_rx_filtering)(struct ice_vsi *vsi);
+ int (*ena_tx_filtering)(struct ice_vsi *vsi);
+ int (*dis_tx_filtering)(struct ice_vsi *vsi);
+ int (*set_port_vlan)(struct ice_vsi *vsi, struct ice_vlan *vlan);
+};
+
+void ice_vsi_init_vlan_ops(struct ice_vsi *vsi);
+struct ice_vsi_vlan_ops *ice_get_compat_vsi_vlan_ops(struct ice_vsi *vsi);
+
+#endif /* _ICE_VSI_VLAN_OPS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
new file mode 100644
index 000000000..41ee081eb
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -0,0 +1,1073 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock_drv.h>
+#include <net/xdp.h>
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_type.h"
+#include "ice_xsk.h"
+#include "ice_txrx.h"
+#include "ice_txrx_lib.h"
+#include "ice_lib.h"
+
+static struct xdp_buff **ice_xdp_buf(struct ice_rx_ring *rx_ring, u32 idx)
+{
+ return &rx_ring->xdp_buf[idx];
+}
+
+/**
+ * ice_qp_reset_stats - Resets all stats for rings of given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
+{
+ memset(&vsi->rx_rings[q_idx]->rx_stats, 0,
+ sizeof(vsi->rx_rings[q_idx]->rx_stats));
+ memset(&vsi->tx_rings[q_idx]->stats, 0,
+ sizeof(vsi->tx_rings[q_idx]->stats));
+ if (ice_is_xdp_ena_vsi(vsi))
+ memset(&vsi->xdp_rings[q_idx]->stats, 0,
+ sizeof(vsi->xdp_rings[q_idx]->stats));
+}
+
+/**
+ * ice_qp_clean_rings - Cleans all the rings of a given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
+{
+ ice_clean_tx_ring(vsi->tx_rings[q_idx]);
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ synchronize_rcu();
+ ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
+ }
+ ice_clean_rx_ring(vsi->rx_rings[q_idx]);
+}
+
+/**
+ * ice_qvec_toggle_napi - Enables/disables NAPI for a given q_vector
+ * @vsi: VSI that has netdev
+ * @q_vector: q_vector that has NAPI context
+ * @enable: true for enable, false for disable
+ */
+static void
+ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector,
+ bool enable)
+{
+ if (!vsi->netdev || !q_vector)
+ return;
+
+ if (enable)
+ napi_enable(&q_vector->napi);
+ else
+ napi_disable(&q_vector->napi);
+}
+
+/**
+ * ice_qvec_dis_irq - Mask off queue interrupt generation on given ring
+ * @vsi: the VSI that contains queue vector being un-configured
+ * @rx_ring: Rx ring that will have its IRQ disabled
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring,
+ struct ice_q_vector *q_vector)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ int base = vsi->base_vector;
+ u16 reg;
+ u32 val;
+
+ /* QINT_TQCTL is being cleared in ice_vsi_stop_tx_ring, so handle
+ * here only QINT_RQCTL
+ */
+ reg = rx_ring->reg_idx;
+ val = rd32(hw, QINT_RQCTL(reg));
+ val &= ~QINT_RQCTL_CAUSE_ENA_M;
+ wr32(hw, QINT_RQCTL(reg), val);
+
+ if (q_vector) {
+ u16 v_idx = q_vector->v_idx;
+
+ wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0);
+ ice_flush(hw);
+ synchronize_irq(pf->msix_entries[v_idx + base].vector);
+ }
+}
+
+/**
+ * ice_qvec_cfg_msix - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+ u16 reg_idx = q_vector->reg_idx;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ struct ice_tx_ring *tx_ring;
+ struct ice_rx_ring *rx_ring;
+
+ ice_cfg_itr(hw, q_vector);
+
+ ice_for_each_tx_ring(tx_ring, q_vector->tx)
+ ice_cfg_txq_interrupt(vsi, tx_ring->reg_idx, reg_idx,
+ q_vector->tx.itr_idx);
+
+ ice_for_each_rx_ring(rx_ring, q_vector->rx)
+ ice_cfg_rxq_interrupt(vsi, rx_ring->reg_idx, reg_idx,
+ q_vector->rx.itr_idx);
+
+ ice_flush(hw);
+}
+
+/**
+ * ice_qvec_ena_irq - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+
+ ice_irq_dynamic_ena(hw, vsi, q_vector);
+
+ ice_flush(hw);
+}
+
+/**
+ * ice_qp_dis - Disables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
+{
+ struct ice_txq_meta txq_meta = { };
+ struct ice_q_vector *q_vector;
+ struct ice_tx_ring *tx_ring;
+ struct ice_rx_ring *rx_ring;
+ int timeout = 50;
+ int err;
+
+ if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+ return -EINVAL;
+
+ tx_ring = vsi->tx_rings[q_idx];
+ rx_ring = vsi->rx_rings[q_idx];
+ q_vector = rx_ring->q_vector;
+
+ while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) {
+ timeout--;
+ if (!timeout)
+ return -EBUSY;
+ usleep_range(1000, 2000);
+ }
+ netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+
+ ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
+ err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
+ if (err)
+ return err;
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+ memset(&txq_meta, 0, sizeof(txq_meta));
+ ice_fill_txq_meta(vsi, xdp_ring, &txq_meta);
+ err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring,
+ &txq_meta);
+ if (err)
+ return err;
+ }
+ ice_qvec_dis_irq(vsi, rx_ring, q_vector);
+
+ err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true);
+ if (err)
+ return err;
+
+ ice_qvec_toggle_napi(vsi, q_vector, false);
+ ice_qp_clean_rings(vsi, q_idx);
+ ice_qp_reset_stats(vsi, q_idx);
+
+ return 0;
+}
+
+/**
+ * ice_qp_ena - Enables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
+{
+ struct ice_aqc_add_tx_qgrp *qg_buf;
+ struct ice_q_vector *q_vector;
+ struct ice_tx_ring *tx_ring;
+ struct ice_rx_ring *rx_ring;
+ u16 size;
+ int err;
+
+ if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+ return -EINVAL;
+
+ size = struct_size(qg_buf, txqs, 1);
+ qg_buf = kzalloc(size, GFP_KERNEL);
+ if (!qg_buf)
+ return -ENOMEM;
+
+ qg_buf->num_txqs = 1;
+
+ tx_ring = vsi->tx_rings[q_idx];
+ rx_ring = vsi->rx_rings[q_idx];
+ q_vector = rx_ring->q_vector;
+
+ err = ice_vsi_cfg_txq(vsi, tx_ring, qg_buf);
+ if (err)
+ goto free_buf;
+
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+ memset(qg_buf, 0, size);
+ qg_buf->num_txqs = 1;
+ err = ice_vsi_cfg_txq(vsi, xdp_ring, qg_buf);
+ if (err)
+ goto free_buf;
+ ice_set_ring_xdp(xdp_ring);
+ ice_tx_xsk_pool(vsi, q_idx);
+ }
+
+ err = ice_vsi_cfg_rxq(rx_ring);
+ if (err)
+ goto free_buf;
+
+ ice_qvec_cfg_msix(vsi, q_vector);
+
+ err = ice_vsi_ctrl_one_rx_ring(vsi, true, q_idx, true);
+ if (err)
+ goto free_buf;
+
+ clear_bit(ICE_CFG_BUSY, vsi->state);
+ ice_qvec_toggle_napi(vsi, q_vector, true);
+ ice_qvec_ena_irq(vsi, q_vector);
+
+ netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+free_buf:
+ kfree(qg_buf);
+ return err;
+}
+
+/**
+ * ice_xsk_pool_disable - disable a buffer pool region
+ * @vsi: Current VSI
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid)
+{
+ struct xsk_buff_pool *pool = xsk_get_pool_from_qid(vsi->netdev, qid);
+
+ if (!pool)
+ return -EINVAL;
+
+ clear_bit(qid, vsi->af_xdp_zc_qps);
+ xsk_pool_dma_unmap(pool, ICE_RX_DMA_ATTR);
+
+ return 0;
+}
+
+/**
+ * ice_xsk_pool_enable - enable a buffer pool region
+ * @vsi: Current VSI
+ * @pool: pointer to a requested buffer pool region
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int
+ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
+{
+ int err;
+
+ if (vsi->type != ICE_VSI_PF)
+ return -EINVAL;
+
+ if (qid >= vsi->netdev->real_num_rx_queues ||
+ qid >= vsi->netdev->real_num_tx_queues)
+ return -EINVAL;
+
+ err = xsk_pool_dma_map(pool, ice_pf_to_dev(vsi->back),
+ ICE_RX_DMA_ATTR);
+ if (err)
+ return err;
+
+ set_bit(qid, vsi->af_xdp_zc_qps);
+
+ return 0;
+}
+
+/**
+ * ice_realloc_rx_xdp_bufs - reallocate for either XSK or normal buffer
+ * @rx_ring: Rx ring
+ * @pool_present: is pool for XSK present
+ *
+ * Try allocating memory and return ENOMEM, if failed to allocate.
+ * If allocation was successful, substitute buffer with allocated one.
+ * Returns 0 on success, negative on failure
+ */
+static int
+ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, bool pool_present)
+{
+ size_t elem_size = pool_present ? sizeof(*rx_ring->xdp_buf) :
+ sizeof(*rx_ring->rx_buf);
+ void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL);
+
+ if (!sw_ring)
+ return -ENOMEM;
+
+ if (pool_present) {
+ kfree(rx_ring->rx_buf);
+ rx_ring->rx_buf = NULL;
+ rx_ring->xdp_buf = sw_ring;
+ } else {
+ kfree(rx_ring->xdp_buf);
+ rx_ring->xdp_buf = NULL;
+ rx_ring->rx_buf = sw_ring;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_realloc_zc_buf - reallocate XDP ZC queue pairs
+ * @vsi: Current VSI
+ * @zc: is zero copy set
+ *
+ * Reallocate buffer for rx_rings that might be used by XSK.
+ * XDP requires more memory, than rx_buf provides.
+ * Returns 0 on success, negative on failure
+ */
+int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc)
+{
+ struct ice_rx_ring *rx_ring;
+ unsigned long q;
+
+ for_each_set_bit(q, vsi->af_xdp_zc_qps,
+ max_t(int, vsi->alloc_txq, vsi->alloc_rxq)) {
+ rx_ring = vsi->rx_rings[q];
+ if (ice_realloc_rx_xdp_bufs(rx_ring, zc))
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_xsk_pool_setup - enable/disable a buffer pool region depending on its state
+ * @vsi: Current VSI
+ * @pool: buffer pool to enable/associate to a ring, NULL to disable
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
+{
+ bool if_running, pool_present = !!pool;
+ int ret = 0, pool_failure = 0;
+
+ if (qid >= vsi->num_rxq || qid >= vsi->num_txq) {
+ netdev_err(vsi->netdev, "Please use queue id in scope of combined queues count\n");
+ pool_failure = -EINVAL;
+ goto failure;
+ }
+
+ if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
+
+ if (if_running) {
+ struct ice_rx_ring *rx_ring = vsi->rx_rings[qid];
+
+ ret = ice_qp_dis(vsi, qid);
+ if (ret) {
+ netdev_err(vsi->netdev, "ice_qp_dis error = %d\n", ret);
+ goto xsk_pool_if_up;
+ }
+
+ ret = ice_realloc_rx_xdp_bufs(rx_ring, pool_present);
+ if (ret)
+ goto xsk_pool_if_up;
+ }
+
+ pool_failure = pool_present ? ice_xsk_pool_enable(vsi, pool, qid) :
+ ice_xsk_pool_disable(vsi, qid);
+
+xsk_pool_if_up:
+ if (if_running) {
+ ret = ice_qp_ena(vsi, qid);
+ if (!ret && pool_present)
+ napi_schedule(&vsi->rx_rings[qid]->xdp_ring->q_vector->napi);
+ else if (ret)
+ netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
+ }
+
+failure:
+ if (pool_failure) {
+ netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n",
+ pool_present ? "en" : "dis", pool_failure);
+ return pool_failure;
+ }
+
+ return ret;
+}
+
+/**
+ * ice_fill_rx_descs - pick buffers from XSK buffer pool and use it
+ * @pool: XSK Buffer pool to pull the buffers from
+ * @xdp: SW ring of xdp_buff that will hold the buffers
+ * @rx_desc: Pointer to Rx descriptors that will be filled
+ * @count: The number of buffers to allocate
+ *
+ * This function allocates a number of Rx buffers from the fill ring
+ * or the internal recycle mechanism and places them on the Rx ring.
+ *
+ * Note that ring wrap should be handled by caller of this function.
+ *
+ * Returns the amount of allocated Rx descriptors
+ */
+static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
+ union ice_32b_rx_flex_desc *rx_desc, u16 count)
+{
+ dma_addr_t dma;
+ u16 buffs;
+ int i;
+
+ buffs = xsk_buff_alloc_batch(pool, xdp, count);
+ for (i = 0; i < buffs; i++) {
+ dma = xsk_buff_xdp_get_dma(*xdp);
+ rx_desc->read.pkt_addr = cpu_to_le64(dma);
+ rx_desc->wb.status_error0 = 0;
+
+ rx_desc++;
+ xdp++;
+ }
+
+ return buffs;
+}
+
+/**
+ * __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ *
+ * Place the @count of descriptors onto Rx ring. Handle the ring wrap
+ * for case where space from next_to_use up to the end of ring is less
+ * than @count. Finally do a tail bump.
+ *
+ * Returns true if all allocations were successful, false if any fail.
+ */
+static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
+{
+ u32 nb_buffs_extra = 0, nb_buffs = 0;
+ union ice_32b_rx_flex_desc *rx_desc;
+ u16 ntu = rx_ring->next_to_use;
+ u16 total_count = count;
+ struct xdp_buff **xdp;
+
+ rx_desc = ICE_RX_DESC(rx_ring, ntu);
+ xdp = ice_xdp_buf(rx_ring, ntu);
+
+ if (ntu + count >= rx_ring->count) {
+ nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp,
+ rx_desc,
+ rx_ring->count - ntu);
+ if (nb_buffs_extra != rx_ring->count - ntu) {
+ ntu += nb_buffs_extra;
+ goto exit;
+ }
+ rx_desc = ICE_RX_DESC(rx_ring, 0);
+ xdp = ice_xdp_buf(rx_ring, 0);
+ ntu = 0;
+ count -= nb_buffs_extra;
+ ice_release_rx_desc(rx_ring, 0);
+ }
+
+ nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count);
+
+ ntu += nb_buffs;
+ if (ntu == rx_ring->count)
+ ntu = 0;
+
+exit:
+ if (rx_ring->next_to_use != ntu)
+ ice_release_rx_desc(rx_ring, ntu);
+
+ return total_count == (nb_buffs_extra + nb_buffs);
+}
+
+/**
+ * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ *
+ * Wrapper for internal allocation routine; figure out how many tail
+ * bumps should take place based on the given threshold
+ *
+ * Returns true if all calls to internal alloc routine succeeded
+ */
+bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
+{
+ u16 rx_thresh = ICE_RING_QUARTER(rx_ring);
+ u16 leftover, i, tail_bumps;
+
+ tail_bumps = count / rx_thresh;
+ leftover = count - (tail_bumps * rx_thresh);
+
+ for (i = 0; i < tail_bumps; i++)
+ if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh))
+ return false;
+ return __ice_alloc_rx_bufs_zc(rx_ring, leftover);
+}
+
+/**
+ * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring
+ * @rx_ring: Rx ring
+ */
+static void ice_bump_ntc(struct ice_rx_ring *rx_ring)
+{
+ int ntc = rx_ring->next_to_clean + 1;
+
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+ prefetch(ICE_RX_DESC(rx_ring, ntc));
+}
+
+/**
+ * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
+ * @rx_ring: Rx ring
+ * @xdp: Pointer to XDP buffer
+ *
+ * This function allocates a new skb from a zero-copy Rx buffer.
+ *
+ * Returns the skb on success, NULL on failure.
+ */
+static struct sk_buff *
+ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
+{
+ unsigned int totalsize = xdp->data_end - xdp->data_meta;
+ unsigned int metasize = xdp->data - xdp->data_meta;
+ struct sk_buff *skb;
+
+ net_prefetch(xdp->data_meta);
+
+ skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!skb))
+ return NULL;
+
+ memcpy(__skb_put(skb, totalsize), xdp->data_meta,
+ ALIGN(totalsize, sizeof(long)));
+
+ if (metasize) {
+ skb_metadata_set(skb, metasize);
+ __skb_pull(skb, metasize);
+ }
+
+ xsk_buff_free(xdp);
+ return skb;
+}
+
+/**
+ * ice_run_xdp_zc - Executes an XDP program in zero-copy path
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ * @xdp_prog: XDP program to run
+ * @xdp_ring: ring to be used for XDP_TX action
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static int
+ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
+{
+ int err, result = ICE_XDP_PASS;
+ u32 act;
+
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+
+ if (likely(act == XDP_REDIRECT)) {
+ err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+ if (!err)
+ return ICE_XDP_REDIR;
+ if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS)
+ result = ICE_XDP_EXIT;
+ else
+ result = ICE_XDP_CONSUMED;
+ goto out_failure;
+ }
+
+ switch (act) {
+ case XDP_PASS:
+ break;
+ case XDP_TX:
+ result = ice_xmit_xdp_buff(xdp, xdp_ring);
+ if (result == ICE_XDP_CONSUMED)
+ goto out_failure;
+ break;
+ case XDP_DROP:
+ result = ICE_XDP_CONSUMED;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
+ fallthrough;
+ case XDP_ABORTED:
+ result = ICE_XDP_CONSUMED;
+out_failure:
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+ break;
+ }
+
+ return result;
+}
+
+/**
+ * ice_clean_rx_irq_zc - consumes packets from the hardware ring
+ * @rx_ring: AF_XDP Rx ring
+ * @budget: NAPI budget
+ *
+ * Returns number of processed packets on success, remaining budget on failure.
+ */
+int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
+{
+ unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+ struct ice_tx_ring *xdp_ring;
+ unsigned int xdp_xmit = 0;
+ struct bpf_prog *xdp_prog;
+ bool failure = false;
+ int entries_to_alloc;
+
+ /* ZC patch is enabled only when XDP program is set,
+ * so here it can not be NULL
+ */
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+ xdp_ring = rx_ring->xdp_ring;
+
+ while (likely(total_rx_packets < (unsigned int)budget)) {
+ union ice_32b_rx_flex_desc *rx_desc;
+ unsigned int size, xdp_res = 0;
+ struct xdp_buff *xdp;
+ struct sk_buff *skb;
+ u16 stat_err_bits;
+ u16 vlan_tag = 0;
+ u16 rx_ptype;
+
+ rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+ stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
+ if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
+ break;
+
+ /* This memory barrier is needed to keep us from reading
+ * any other fields out of the rx_desc until we have
+ * verified the descriptor has been written back.
+ */
+ dma_rmb();
+
+ if (unlikely(rx_ring->next_to_clean == rx_ring->next_to_use))
+ break;
+
+ xdp = *ice_xdp_buf(rx_ring, rx_ring->next_to_clean);
+
+ size = le16_to_cpu(rx_desc->wb.pkt_len) &
+ ICE_RX_FLX_DESC_PKT_LEN_M;
+ if (!size) {
+ xdp->data = NULL;
+ xdp->data_end = NULL;
+ xdp->data_hard_start = NULL;
+ xdp->data_meta = NULL;
+ goto construct_skb;
+ }
+
+ xsk_buff_set_size(xdp, size);
+ xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool);
+
+ xdp_res = ice_run_xdp_zc(rx_ring, xdp, xdp_prog, xdp_ring);
+ if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
+ xdp_xmit |= xdp_res;
+ } else if (xdp_res == ICE_XDP_EXIT) {
+ failure = true;
+ break;
+ } else if (xdp_res == ICE_XDP_CONSUMED) {
+ xsk_buff_free(xdp);
+ } else if (xdp_res == ICE_XDP_PASS) {
+ goto construct_skb;
+ }
+
+ total_rx_bytes += size;
+ total_rx_packets++;
+
+ ice_bump_ntc(rx_ring);
+ continue;
+
+construct_skb:
+ /* XDP_PASS path */
+ skb = ice_construct_skb_zc(rx_ring, xdp);
+ if (!skb) {
+ rx_ring->rx_stats.alloc_buf_failed++;
+ break;
+ }
+
+ ice_bump_ntc(rx_ring);
+
+ if (eth_skb_pad(skb)) {
+ skb = NULL;
+ continue;
+ }
+
+ total_rx_bytes += skb->len;
+ total_rx_packets++;
+
+ vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc);
+
+ rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+ ICE_RX_FLEX_DESC_PTYPE_M;
+
+ ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+ ice_receive_skb(rx_ring, skb, vlan_tag);
+ }
+
+ entries_to_alloc = ICE_DESC_UNUSED(rx_ring);
+ if (entries_to_alloc > ICE_RING_QUARTER(rx_ring))
+ failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc);
+
+ ice_finalize_xdp_rx(xdp_ring, xdp_xmit);
+ ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
+
+ if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
+ if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
+ xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
+ else
+ xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
+
+ return (int)total_rx_packets;
+ }
+
+ return failure ? budget : (int)total_rx_packets;
+}
+
+/**
+ * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
+ * @xdp_ring: XDP Tx ring
+ * @tx_buf: Tx buffer to clean
+ */
+static void
+ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
+{
+ page_frag_free(tx_buf->raw_buf);
+ xdp_ring->xdp_tx_active--;
+ dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buf, len, 0);
+}
+
+/**
+ * ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ
+ * @xdp_ring: XDP Tx ring
+ */
+static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
+{
+ u16 ntc = xdp_ring->next_to_clean;
+ struct ice_tx_desc *tx_desc;
+ u16 cnt = xdp_ring->count;
+ struct ice_tx_buf *tx_buf;
+ u16 completed_frames = 0;
+ u16 xsk_frames = 0;
+ u16 last_rs;
+ int i;
+
+ last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1;
+ tx_desc = ICE_TX_DESC(xdp_ring, last_rs);
+ if ((tx_desc->cmd_type_offset_bsz &
+ cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) {
+ if (last_rs >= ntc)
+ completed_frames = last_rs - ntc + 1;
+ else
+ completed_frames = last_rs + cnt - ntc + 1;
+ }
+
+ if (!completed_frames)
+ return;
+
+ if (likely(!xdp_ring->xdp_tx_active)) {
+ xsk_frames = completed_frames;
+ goto skip;
+ }
+
+ ntc = xdp_ring->next_to_clean;
+ for (i = 0; i < completed_frames; i++) {
+ tx_buf = &xdp_ring->tx_buf[ntc];
+
+ if (tx_buf->raw_buf) {
+ ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+ tx_buf->raw_buf = NULL;
+ } else {
+ xsk_frames++;
+ }
+
+ ntc++;
+ if (ntc >= xdp_ring->count)
+ ntc = 0;
+ }
+skip:
+ tx_desc->cmd_type_offset_bsz = 0;
+ xdp_ring->next_to_clean += completed_frames;
+ if (xdp_ring->next_to_clean >= cnt)
+ xdp_ring->next_to_clean -= cnt;
+ if (xsk_frames)
+ xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
+}
+
+/**
+ * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
+ * @xdp_ring: XDP ring to produce the HW Tx descriptor on
+ * @desc: AF_XDP descriptor to pull the DMA address and length from
+ * @total_bytes: bytes accumulator that will be used for stats update
+ */
+static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
+ unsigned int *total_bytes)
+{
+ struct ice_tx_desc *tx_desc;
+ dma_addr_t dma;
+
+ dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr);
+ xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len);
+
+ tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
+ 0, desc->len, 0);
+
+ *total_bytes += desc->len;
+}
+
+/**
+ * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
+ * @total_bytes: bytes accumulator that will be used for stats update
+ */
+static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
+ unsigned int *total_bytes)
+{
+ u16 ntu = xdp_ring->next_to_use;
+ struct ice_tx_desc *tx_desc;
+ u32 i;
+
+ loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
+ dma_addr_t dma;
+
+ dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr);
+ xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len);
+
+ tx_desc = ICE_TX_DESC(xdp_ring, ntu++);
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
+ 0, descs[i].len, 0);
+
+ *total_bytes += descs[i].len;
+ }
+
+ xdp_ring->next_to_use = ntu;
+}
+
+/**
+ * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
+ * @nb_pkts: count of packets to be send
+ * @total_bytes: bytes accumulator that will be used for stats update
+ */
+static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
+ u32 nb_pkts, unsigned int *total_bytes)
+{
+ u32 batched, leftover, i;
+
+ batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH);
+ leftover = nb_pkts & (PKTS_PER_BATCH - 1);
+ for (i = 0; i < batched; i += PKTS_PER_BATCH)
+ ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes);
+ for (; i < batched + leftover; i++)
+ ice_xmit_pkt(xdp_ring, &descs[i], total_bytes);
+}
+
+/**
+ * ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU)
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ */
+static void ice_set_rs_bit(struct ice_tx_ring *xdp_ring)
+{
+ u16 ntu = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1;
+ struct ice_tx_desc *tx_desc;
+
+ tx_desc = ICE_TX_DESC(xdp_ring, ntu);
+ tx_desc->cmd_type_offset_bsz |=
+ cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
+}
+
+/**
+ * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ *
+ * Returns true if there is no more work that needs to be done, false otherwise
+ */
+bool ice_xmit_zc(struct ice_tx_ring *xdp_ring)
+{
+ struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
+ u32 nb_pkts, nb_processed = 0;
+ unsigned int total_bytes = 0;
+ int budget;
+
+ ice_clean_xdp_irq_zc(xdp_ring);
+
+ budget = ICE_DESC_UNUSED(xdp_ring);
+ budget = min_t(u16, budget, ICE_RING_QUARTER(xdp_ring));
+
+ nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
+ if (!nb_pkts)
+ return true;
+
+ if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
+ nb_processed = xdp_ring->count - xdp_ring->next_to_use;
+ ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes);
+ xdp_ring->next_to_use = 0;
+ }
+
+ ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed,
+ &total_bytes);
+
+ ice_set_rs_bit(xdp_ring);
+ ice_xdp_ring_update_tail(xdp_ring);
+ ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes);
+
+ if (xsk_uses_need_wakeup(xdp_ring->xsk_pool))
+ xsk_set_tx_need_wakeup(xdp_ring->xsk_pool);
+
+ return nb_pkts < budget;
+}
+
+/**
+ * ice_xsk_wakeup - Implements ndo_xsk_wakeup
+ * @netdev: net_device
+ * @queue_id: queue to wake up
+ * @flags: ignored in our case, since we have Rx and Tx in the same NAPI
+ *
+ * Returns negative on error, zero otherwise.
+ */
+int
+ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
+ u32 __always_unused flags)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_q_vector *q_vector;
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_tx_ring *ring;
+
+ if (test_bit(ICE_VSI_DOWN, vsi->state))
+ return -ENETDOWN;
+
+ if (!ice_is_xdp_ena_vsi(vsi))
+ return -EINVAL;
+
+ if (queue_id >= vsi->num_txq || queue_id >= vsi->num_rxq)
+ return -EINVAL;
+
+ ring = vsi->rx_rings[queue_id]->xdp_ring;
+
+ if (!ring->xsk_pool)
+ return -EINVAL;
+
+ /* The idea here is that if NAPI is running, mark a miss, so
+ * it will run again. If not, trigger an interrupt and
+ * schedule the NAPI from interrupt context. If NAPI would be
+ * scheduled here, the interrupt affinity would not be
+ * honored.
+ */
+ q_vector = ring->q_vector;
+ if (!napi_if_scheduled_mark_missed(&q_vector->napi))
+ ice_trigger_sw_intr(&vsi->back->hw, q_vector);
+
+ return 0;
+}
+
+/**
+ * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP buff pool attached
+ * @vsi: VSI to be checked
+ *
+ * Returns true if any of the Rx rings has an AF_XDP buff pool attached
+ */
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
+{
+ int i;
+
+ ice_for_each_rxq(vsi, i) {
+ if (xsk_get_pool_from_qid(vsi->netdev, i))
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * ice_xsk_clean_rx_ring - clean buffer pool queues connected to a given Rx ring
+ * @rx_ring: ring to be cleaned
+ */
+void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring)
+{
+ u16 ntc = rx_ring->next_to_clean;
+ u16 ntu = rx_ring->next_to_use;
+
+ while (ntc != ntu) {
+ struct xdp_buff *xdp = *ice_xdp_buf(rx_ring, ntc);
+
+ xsk_buff_free(xdp);
+ ntc++;
+ if (ntc >= rx_ring->count)
+ ntc = 0;
+ }
+}
+
+/**
+ * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its buffer pool queues
+ * @xdp_ring: XDP_Tx ring
+ */
+void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring)
+{
+ u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use;
+ u32 xsk_frames = 0;
+
+ while (ntc != ntu) {
+ struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
+
+ if (tx_buf->raw_buf)
+ ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+ else
+ xsk_frames++;
+
+ tx_buf->raw_buf = NULL;
+
+ ntc++;
+ if (ntc >= xdp_ring->count)
+ ntc = 0;
+ }
+
+ if (xsk_frames)
+ xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
new file mode 100644
index 000000000..6fa181f08
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_XSK_H_
+#define _ICE_XSK_H_
+#include "ice_txrx.h"
+
+#define PKTS_PER_BATCH 8
+
+#ifdef __clang__
+#define loop_unrolled_for _Pragma("clang loop unroll_count(8)") for
+#elif __GNUC__ >= 8
+#define loop_unrolled_for _Pragma("GCC unroll 8") for
+#else
+#define loop_unrolled_for for
+#endif
+
+struct ice_vsi;
+
+#ifdef CONFIG_XDP_SOCKETS
+int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool,
+ u16 qid);
+int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget);
+int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
+bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count);
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
+void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring);
+void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring);
+bool ice_xmit_zc(struct ice_tx_ring *xdp_ring);
+int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc);
+#else
+static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring)
+{
+ return false;
+}
+
+static inline int
+ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi,
+ struct xsk_buff_pool __always_unused *pool,
+ u16 __always_unused qid)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring,
+ int __always_unused budget)
+{
+ return 0;
+}
+
+static inline bool
+ice_alloc_rx_bufs_zc(struct ice_rx_ring __always_unused *rx_ring,
+ u16 __always_unused count)
+{
+ return false;
+}
+
+static inline bool ice_xsk_any_rx_ring_ena(struct ice_vsi __always_unused *vsi)
+{
+ return false;
+}
+
+static inline int
+ice_xsk_wakeup(struct net_device __always_unused *netdev,
+ u32 __always_unused queue_id, u32 __always_unused flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) { }
+static inline void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring) { }
+
+static inline int
+ice_realloc_zc_buf(struct ice_vsi __always_unused *vsi,
+ bool __always_unused zc)
+{
+ return 0;
+}
+#endif /* CONFIG_XDP_SOCKETS */
+#endif /* !_ICE_XSK_H_ */