diff options
Diffstat (limited to 'drivers/net/ethernet/intel/ice')
22 files changed, 18967 insertions, 0 deletions
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile new file mode 100644 index 000000000..4058673fd --- /dev/null +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2018, Intel Corporation. + +# +# Makefile for the Intel(R) Ethernet Connection E800 Series Linux Driver +# + +obj-$(CONFIG_ICE) += ice.o + +ice-y := ice_main.o \ + ice_controlq.o \ + ice_common.o \ + ice_nvm.o \ + ice_switch.o \ + ice_sched.o \ + ice_txrx.o \ + ice_ethtool.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h new file mode 100644 index 000000000..67591722c --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -0,0 +1,319 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_H_ +#define _ICE_H_ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/compiler.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/cpumask.h> +#include <linux/rtnetlink.h> +#include <linux/if_vlan.h> +#include <linux/dma-mapping.h> +#include <linux/pci.h> +#include <linux/workqueue.h> +#include <linux/aer.h> +#include <linux/interrupt.h> +#include <linux/ethtool.h> +#include <linux/timer.h> +#include <linux/delay.h> +#include <linux/bitmap.h> +#include <linux/log2.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/if_bridge.h> +#include <net/ipv6.h> +#include "ice_devids.h" +#include "ice_type.h" +#include "ice_txrx.h" +#include "ice_switch.h" +#include "ice_common.h" +#include "ice_sched.h" + +extern const char ice_drv_ver[]; +#define ICE_BAR0 0 +#define ICE_DFLT_NUM_DESC 128 +#define ICE_REQ_DESC_MULTIPLE 32 +#define ICE_MIN_NUM_DESC ICE_REQ_DESC_MULTIPLE +#define ICE_MAX_NUM_DESC 8160 +#define ICE_DFLT_TRAFFIC_CLASS BIT(0) +#define ICE_INT_NAME_STR_LEN (IFNAMSIZ + 16) +#define ICE_ETHTOOL_FWVER_LEN 32 +#define ICE_AQ_LEN 64 +#define ICE_MIN_MSIX 2 +#define ICE_NO_VSI 0xffff +#define ICE_MAX_VSI_ALLOC 130 +#define ICE_MAX_TXQS 2048 +#define ICE_MAX_RXQS 2048 +#define ICE_VSI_MAP_CONTIG 0 +#define ICE_VSI_MAP_SCATTER 1 +#define ICE_MAX_SCATTER_TXQS 16 +#define ICE_MAX_SCATTER_RXQS 16 +#define ICE_Q_WAIT_RETRY_LIMIT 10 +#define ICE_Q_WAIT_MAX_RETRY (5 * ICE_Q_WAIT_RETRY_LIMIT) +#define ICE_MAX_LG_RSS_QS 256 +#define ICE_MAX_SMALL_RSS_QS 8 +#define ICE_RES_VALID_BIT 0x8000 +#define ICE_RES_MISC_VEC_ID (ICE_RES_VALID_BIT - 1) +#define ICE_INVAL_Q_INDEX 0xffff + +#define ICE_VSIQF_HKEY_ARRAY_SIZE ((VSIQF_HKEY_MAX_INDEX + 1) * 4) + +#define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) + +#define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - \ + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN) + +#define ICE_UP_TABLE_TRANSLATE(val, i) \ + (((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \ + ICE_AQ_VSI_UP_TABLE_UP##i##_M) + +#define ICE_TX_DESC(R, i) (&(((struct ice_tx_desc *)((R)->desc))[i])) +#define ICE_RX_DESC(R, i) (&(((union ice_32b_rx_flex_desc *)((R)->desc))[i])) +#define ICE_TX_CTX_DESC(R, i) (&(((struct ice_tx_ctx_desc *)((R)->desc))[i])) + +/* Macro for each VSI in a PF */ +#define ice_for_each_vsi(pf, i) \ + for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++) + +/* Macros for each tx/rx ring in a VSI */ +#define ice_for_each_txq(vsi, i) \ + for ((i) = 0; (i) < (vsi)->num_txq; (i)++) + +#define ice_for_each_rxq(vsi, i) \ + for ((i) = 0; (i) < (vsi)->num_rxq; (i)++) + +/* Macros for each allocated tx/rx ring whether used or not in a VSI */ +#define ice_for_each_alloc_txq(vsi, i) \ + for ((i) = 0; (i) < (vsi)->alloc_txq; (i)++) + +#define ice_for_each_alloc_rxq(vsi, i) \ + for ((i) = 0; (i) < (vsi)->alloc_rxq; (i)++) + +struct ice_tc_info { + u16 qoffset; + u16 qcount; +}; + +struct ice_tc_cfg { + u8 numtc; /* Total number of enabled TCs */ + u8 ena_tc; /* TX map */ + struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS]; +}; + +struct ice_res_tracker { + u16 num_entries; + u16 search_hint; + u16 list[1]; +}; + +struct ice_sw { + struct ice_pf *pf; + u16 sw_id; /* switch ID for this switch */ + u16 bridge_mode; /* VEB/VEPA/Port Virtualizer */ +}; + +enum ice_state { + __ICE_DOWN, + __ICE_NEEDS_RESTART, + __ICE_RESET_RECOVERY_PENDING, /* set by driver when reset starts */ + __ICE_PFR_REQ, /* set by driver and peers */ + __ICE_CORER_REQ, /* set by driver and peers */ + __ICE_GLOBR_REQ, /* set by driver and peers */ + __ICE_CORER_RECV, /* set by OICR handler */ + __ICE_GLOBR_RECV, /* set by OICR handler */ + __ICE_EMPR_RECV, /* set by OICR handler */ + __ICE_SUSPENDED, /* set on module remove path */ + __ICE_RESET_FAILED, /* set by reset/rebuild */ + __ICE_ADMINQ_EVENT_PENDING, + __ICE_FLTR_OVERFLOW_PROMISC, + __ICE_CFG_BUSY, + __ICE_SERVICE_SCHED, + __ICE_STATE_NBITS /* must be last */ +}; + +enum ice_vsi_flags { + ICE_VSI_FLAG_UMAC_FLTR_CHANGED, + ICE_VSI_FLAG_MMAC_FLTR_CHANGED, + ICE_VSI_FLAG_VLAN_FLTR_CHANGED, + ICE_VSI_FLAG_PROMISC_CHANGED, + ICE_VSI_FLAG_NBITS /* must be last */ +}; + +/* struct that defines a VSI, associated with a dev */ +struct ice_vsi { + struct net_device *netdev; + struct ice_sw *vsw; /* switch this VSI is on */ + struct ice_pf *back; /* back pointer to PF */ + struct ice_port_info *port_info; /* back pointer to port_info */ + struct ice_ring **rx_rings; /* rx ring array */ + struct ice_ring **tx_rings; /* tx ring array */ + struct ice_q_vector **q_vectors; /* q_vector array */ + + irqreturn_t (*irq_handler)(int irq, void *data); + + u64 tx_linearize; + DECLARE_BITMAP(state, __ICE_STATE_NBITS); + DECLARE_BITMAP(flags, ICE_VSI_FLAG_NBITS); + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + unsigned int current_netdev_flags; + u32 tx_restart; + u32 tx_busy; + u32 rx_buf_failed; + u32 rx_page_failed; + int num_q_vectors; + int base_vector; + enum ice_vsi_type type; + u16 vsi_num; /* HW (absolute) index of this VSI */ + u16 idx; /* software index in pf->vsi[] */ + + /* Interrupt thresholds */ + u16 work_lmt; + + /* RSS config */ + u16 rss_table_size; /* HW RSS table size */ + u16 rss_size; /* Allocated RSS queues */ + u8 *rss_hkey_user; /* User configured hash keys */ + u8 *rss_lut_user; /* User configured lookup table entries */ + u8 rss_lut_type; /* used to configure Get/Set RSS LUT AQ call */ + + u16 max_frame; + u16 rx_buf_len; + + struct ice_aqc_vsi_props info; /* VSI properties */ + + /* VSI stats */ + struct rtnl_link_stats64 net_stats; + struct ice_eth_stats eth_stats; + struct ice_eth_stats eth_stats_prev; + + struct list_head tmp_sync_list; /* MAC filters to be synced */ + struct list_head tmp_unsync_list; /* MAC filters to be unsynced */ + + u8 irqs_ready; + u8 current_isup; /* Sync 'link up' logging */ + u8 stat_offsets_loaded; + + /* queue information */ + u8 tx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ + u8 rx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ + u16 txq_map[ICE_MAX_TXQS]; /* index in pf->avail_txqs */ + u16 rxq_map[ICE_MAX_RXQS]; /* index in pf->avail_rxqs */ + u16 alloc_txq; /* Allocated Tx queues */ + u16 num_txq; /* Used Tx queues */ + u16 alloc_rxq; /* Allocated Rx queues */ + u16 num_rxq; /* Used Rx queues */ + u16 num_desc; + struct ice_tc_cfg tc_cfg; +} ____cacheline_internodealigned_in_smp; + +/* struct that defines an interrupt vector */ +struct ice_q_vector { + struct ice_vsi *vsi; + cpumask_t affinity_mask; + struct napi_struct napi; + struct ice_ring_container rx; + struct ice_ring_container tx; + struct irq_affinity_notify affinity_notify; + u16 v_idx; /* index in the vsi->q_vector array. */ + u8 num_ring_tx; /* total number of tx rings in vector */ + u8 num_ring_rx; /* total number of rx rings in vector */ + char name[ICE_INT_NAME_STR_LEN]; +} ____cacheline_internodealigned_in_smp; + +enum ice_pf_flags { + ICE_FLAG_MSIX_ENA, + ICE_FLAG_FLTR_SYNC, + ICE_FLAG_RSS_ENA, + ICE_PF_FLAGS_NBITS /* must be last */ +}; + +struct ice_pf { + struct pci_dev *pdev; + struct msix_entry *msix_entries; + struct ice_res_tracker *irq_tracker; + struct ice_vsi **vsi; /* VSIs created by the driver */ + struct ice_sw *first_sw; /* first switch created by firmware */ + DECLARE_BITMAP(state, __ICE_STATE_NBITS); + DECLARE_BITMAP(avail_txqs, ICE_MAX_TXQS); + DECLARE_BITMAP(avail_rxqs, ICE_MAX_RXQS); + DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS); + unsigned long serv_tmr_period; + unsigned long serv_tmr_prev; + struct timer_list serv_tmr; + struct work_struct serv_task; + struct mutex avail_q_mutex; /* protects access to avail_[rx|tx]qs */ + struct mutex sw_mutex; /* lock for protecting VSI alloc flow */ + u32 msg_enable; + u32 hw_csum_rx_error; + u32 oicr_idx; /* Other interrupt cause vector index */ + u32 num_lan_msix; /* Total MSIX vectors for base driver */ + u32 num_avail_msix; /* remaining MSIX vectors left unclaimed */ + u16 num_lan_tx; /* num lan tx queues setup */ + u16 num_lan_rx; /* num lan rx queues setup */ + u16 q_left_tx; /* remaining num tx queues left unclaimed */ + u16 q_left_rx; /* remaining num rx queues left unclaimed */ + u16 next_vsi; /* Next free slot in pf->vsi[] - 0-based! */ + u16 num_alloc_vsi; + u16 corer_count; /* Core reset count */ + u16 globr_count; /* Global reset count */ + u16 empr_count; /* EMP reset count */ + u16 pfr_count; /* PF reset count */ + + struct ice_hw_port_stats stats; + struct ice_hw_port_stats stats_prev; + struct ice_hw hw; + u8 stat_prev_loaded; /* has previous stats been loaded */ + char int_name[ICE_INT_NAME_STR_LEN]; +}; + +struct ice_netdev_priv { + struct ice_vsi *vsi; +}; + +/** + * ice_irq_dynamic_ena - Enable default interrupt generation settings + * @hw: pointer to hw struct + * @vsi: pointer to vsi struct, can be NULL + * @q_vector: pointer to q_vector, can be NULL + */ +static inline void ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi, + struct ice_q_vector *q_vector) +{ + u32 vector = (vsi && q_vector) ? vsi->base_vector + q_vector->v_idx : + ((struct ice_pf *)hw->back)->oicr_idx; + int itr = ICE_ITR_NONE; + u32 val; + + /* clear the PBA here, as this function is meant to clean out all + * previous interrupts and enable the interrupt + */ + val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | + (itr << GLINT_DYN_CTL_ITR_INDX_S); + if (vsi) + if (test_bit(__ICE_DOWN, vsi->state)) + return; + wr32(hw, GLINT_DYN_CTL(vector), val); +} + +static inline void ice_vsi_set_tc_cfg(struct ice_vsi *vsi) +{ + vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS; + vsi->tc_cfg.numtc = 1; +} + +void ice_set_ethtool_ops(struct net_device *netdev); +int ice_up(struct ice_vsi *vsi); +int ice_down(struct ice_vsi *vsi); +int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); +int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); +void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size); +void ice_print_link_msg(struct ice_vsi *vsi, bool isup); + +#endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h new file mode 100644 index 000000000..328d293bc --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -0,0 +1,1354 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_ADMINQ_CMD_H_ +#define _ICE_ADMINQ_CMD_H_ + +/* This header file defines the Admin Queue commands, error codes and + * descriptor format. It is shared between Firmware and Software. + */ + +#define ICE_MAX_VSI 768 +#define ICE_AQC_TOPO_MAX_LEVEL_NUM 0x9 +#define ICE_AQ_SET_MAC_FRAME_SIZE_MAX 9728 + +struct ice_aqc_generic { + __le32 param0; + __le32 param1; + __le32 addr_high; + __le32 addr_low; +}; + +/* Get version (direct 0x0001) */ +struct ice_aqc_get_ver { + __le32 rom_ver; + __le32 fw_build; + u8 fw_branch; + u8 fw_major; + u8 fw_minor; + u8 fw_patch; + u8 api_branch; + u8 api_major; + u8 api_minor; + u8 api_patch; +}; + +/* Queue Shutdown (direct 0x0003) */ +struct ice_aqc_q_shutdown { +#define ICE_AQC_DRIVER_UNLOADING BIT(0) + __le32 driver_unloading; + u8 reserved[12]; +}; + +/* Request resource ownership (direct 0x0008) + * Release resource ownership (direct 0x0009) + */ +struct ice_aqc_req_res { + __le16 res_id; +#define ICE_AQC_RES_ID_NVM 1 +#define ICE_AQC_RES_ID_SDP 2 +#define ICE_AQC_RES_ID_CHNG_LOCK 3 +#define ICE_AQC_RES_ID_GLBL_LOCK 4 + __le16 access_type; +#define ICE_AQC_RES_ACCESS_READ 1 +#define ICE_AQC_RES_ACCESS_WRITE 2 + + /* Upon successful completion, FW writes this value and driver is + * expected to release resource before timeout. This value is provided + * in milliseconds. + */ + __le32 timeout; +#define ICE_AQ_RES_NVM_READ_DFLT_TIMEOUT_MS 3000 +#define ICE_AQ_RES_NVM_WRITE_DFLT_TIMEOUT_MS 180000 +#define ICE_AQ_RES_CHNG_LOCK_DFLT_TIMEOUT_MS 1000 +#define ICE_AQ_RES_GLBL_LOCK_DFLT_TIMEOUT_MS 3000 + /* For SDP: pin id of the SDP */ + __le32 res_number; + /* Status is only used for ICE_AQC_RES_ID_GLBL_LOCK */ + __le16 status; +#define ICE_AQ_RES_GLBL_SUCCESS 0 +#define ICE_AQ_RES_GLBL_IN_PROG 1 +#define ICE_AQ_RES_GLBL_DONE 2 + u8 reserved[2]; +}; + +/* Get function capabilities (indirect 0x000A) + * Get device capabilities (indirect 0x000B) + */ +struct ice_aqc_list_caps { + u8 cmd_flags; + u8 pf_index; + u8 reserved[2]; + __le32 count; + __le32 addr_high; + __le32 addr_low; +}; + +/* Device/Function buffer entry, repeated per reported capability */ +struct ice_aqc_list_caps_elem { + __le16 cap; +#define ICE_AQC_CAPS_VSI 0x0017 +#define ICE_AQC_CAPS_RSS 0x0040 +#define ICE_AQC_CAPS_RXQS 0x0041 +#define ICE_AQC_CAPS_TXQS 0x0042 +#define ICE_AQC_CAPS_MSIX 0x0043 +#define ICE_AQC_CAPS_MAX_MTU 0x0047 + + u8 major_ver; + u8 minor_ver; + /* Number of resources described by this capability */ + __le32 number; + /* Only meaningful for some types of resources */ + __le32 logical_id; + /* Only meaningful for some types of resources */ + __le32 phys_id; + __le64 rsvd1; + __le64 rsvd2; +}; + +/* Manage MAC address, read command - indirect (0x0107) + * This struct is also used for the response + */ +struct ice_aqc_manage_mac_read { + __le16 flags; /* Zeroed by device driver */ +#define ICE_AQC_MAN_MAC_LAN_ADDR_VALID BIT(4) +#define ICE_AQC_MAN_MAC_SAN_ADDR_VALID BIT(5) +#define ICE_AQC_MAN_MAC_PORT_ADDR_VALID BIT(6) +#define ICE_AQC_MAN_MAC_WOL_ADDR_VALID BIT(7) +#define ICE_AQC_MAN_MAC_READ_S 4 +#define ICE_AQC_MAN_MAC_READ_M (0xF << ICE_AQC_MAN_MAC_READ_S) + u8 lport_num; + u8 lport_num_valid; +#define ICE_AQC_MAN_MAC_PORT_NUM_IS_VALID BIT(0) + u8 num_addr; /* Used in response */ + u8 reserved[3]; + __le32 addr_high; + __le32 addr_low; +}; + +/* Response buffer format for manage MAC read command */ +struct ice_aqc_manage_mac_read_resp { + u8 lport_num; + u8 addr_type; +#define ICE_AQC_MAN_MAC_ADDR_TYPE_LAN 0 +#define ICE_AQC_MAN_MAC_ADDR_TYPE_WOL 1 + u8 mac_addr[ETH_ALEN]; +}; + +/* Manage MAC address, write command - direct (0x0108) */ +struct ice_aqc_manage_mac_write { + u8 port_num; + u8 flags; +#define ICE_AQC_MAN_MAC_WR_MC_MAG_EN BIT(0) +#define ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP BIT(1) +#define ICE_AQC_MAN_MAC_WR_S 6 +#define ICE_AQC_MAN_MAC_WR_M (3 << ICE_AQC_MAN_MAC_WR_S) +#define ICE_AQC_MAN_MAC_UPDATE_LAA 0 +#define ICE_AQC_MAN_MAC_UPDATE_LAA_WOL (BIT(0) << ICE_AQC_MAN_MAC_WR_S) + /* High 16 bits of MAC address in big endian order */ + __be16 sah; + /* Low 32 bits of MAC address in big endian order */ + __be32 sal; + __le32 addr_high; + __le32 addr_low; +}; + +/* Clear PXE Command and response (direct 0x0110) */ +struct ice_aqc_clear_pxe { + u8 rx_cnt; +#define ICE_AQC_CLEAR_PXE_RX_CNT 0x2 + u8 reserved[15]; +}; + +/* Get switch configuration (0x0200) */ +struct ice_aqc_get_sw_cfg { + /* Reserved for command and copy of request flags for response */ + __le16 flags; + /* First desc in case of command and next_elem in case of response + * In case of response, if it is not zero, means all the configuration + * was not returned and new command shall be sent with this value in + * the 'first desc' field + */ + __le16 element; + /* Reserved for command, only used for response */ + __le16 num_elems; + __le16 rsvd; + __le32 addr_high; + __le32 addr_low; +}; + +/* Each entry in the response buffer is of the following type: */ +struct ice_aqc_get_sw_cfg_resp_elem { + /* VSI/Port Number */ + __le16 vsi_port_num; +#define ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_S 0 +#define ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_M \ + (0x3FF << ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_S) +#define ICE_AQC_GET_SW_CONF_RESP_TYPE_S 14 +#define ICE_AQC_GET_SW_CONF_RESP_TYPE_M (0x3 << ICE_AQC_GET_SW_CONF_RESP_TYPE_S) +#define ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT 0 +#define ICE_AQC_GET_SW_CONF_RESP_VIRT_PORT 1 +#define ICE_AQC_GET_SW_CONF_RESP_VSI 2 + + /* SWID VSI/Port belongs to */ + __le16 swid; + + /* Bit 14..0 : PF/VF number VSI belongs to + * Bit 15 : VF indication bit + */ + __le16 pf_vf_num; +#define ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_S 0 +#define ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_M \ + (0x7FFF << ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_S) +#define ICE_AQC_GET_SW_CONF_RESP_IS_VF BIT(15) +}; + +/* The response buffer is as follows. Note that the length of the + * elements array varies with the length of the command response. + */ +struct ice_aqc_get_sw_cfg_resp { + struct ice_aqc_get_sw_cfg_resp_elem elements[1]; +}; + +/* These resource type defines are used for all switch resource + * commands where a resource type is required, such as: + * Get Resource Allocation command (indirect 0x0204) + * Allocate Resources command (indirect 0x0208) + * Free Resources command (indirect 0x0209) + * Get Allocated Resource Descriptors Command (indirect 0x020A) + */ +#define ICE_AQC_RES_TYPE_VSI_LIST_REP 0x03 +#define ICE_AQC_RES_TYPE_VSI_LIST_PRUNE 0x04 + +/* Allocate Resources command (indirect 0x0208) + * Free Resources command (indirect 0x0209) + */ +struct ice_aqc_alloc_free_res_cmd { + __le16 num_entries; /* Number of Resource entries */ + u8 reserved[6]; + __le32 addr_high; + __le32 addr_low; +}; + +/* Resource descriptor */ +struct ice_aqc_res_elem { + union { + __le16 sw_resp; + __le16 flu_resp; + } e; +}; + +/* Buffer for Allocate/Free Resources commands */ +struct ice_aqc_alloc_free_res_elem { + __le16 res_type; /* Types defined above cmd 0x0204 */ +#define ICE_AQC_RES_TYPE_SHARED_S 7 +#define ICE_AQC_RES_TYPE_SHARED_M (0x1 << ICE_AQC_RES_TYPE_SHARED_S) +#define ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_S 8 +#define ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_M \ + (0xF << ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_S) + __le16 num_elems; + struct ice_aqc_res_elem elem[1]; +}; + +/* Add VSI (indirect 0x0210) + * Update VSI (indirect 0x0211) + * Get VSI (indirect 0x0212) + * Free VSI (indirect 0x0213) + */ +struct ice_aqc_add_get_update_free_vsi { + __le16 vsi_num; +#define ICE_AQ_VSI_NUM_S 0 +#define ICE_AQ_VSI_NUM_M (0x03FF << ICE_AQ_VSI_NUM_S) +#define ICE_AQ_VSI_IS_VALID BIT(15) + __le16 cmd_flags; +#define ICE_AQ_VSI_KEEP_ALLOC 0x1 + u8 vf_id; + u8 reserved; + __le16 vsi_flags; +#define ICE_AQ_VSI_TYPE_S 0 +#define ICE_AQ_VSI_TYPE_M (0x3 << ICE_AQ_VSI_TYPE_S) +#define ICE_AQ_VSI_TYPE_VF 0x0 +#define ICE_AQ_VSI_TYPE_VMDQ2 0x1 +#define ICE_AQ_VSI_TYPE_PF 0x2 +#define ICE_AQ_VSI_TYPE_EMP_MNG 0x3 + __le32 addr_high; + __le32 addr_low; +}; + +/* Response descriptor for: + * Add VSI (indirect 0x0210) + * Update VSI (indirect 0x0211) + * Free VSI (indirect 0x0213) + */ +struct ice_aqc_add_update_free_vsi_resp { + __le16 vsi_num; + __le16 ext_status; + __le16 vsi_used; + __le16 vsi_free; + __le32 addr_high; + __le32 addr_low; +}; + +struct ice_aqc_vsi_props { + __le16 valid_sections; +#define ICE_AQ_VSI_PROP_SW_VALID BIT(0) +#define ICE_AQ_VSI_PROP_SECURITY_VALID BIT(1) +#define ICE_AQ_VSI_PROP_VLAN_VALID BIT(2) +#define ICE_AQ_VSI_PROP_OUTER_TAG_VALID BIT(3) +#define ICE_AQ_VSI_PROP_INGRESS_UP_VALID BIT(4) +#define ICE_AQ_VSI_PROP_EGRESS_UP_VALID BIT(5) +#define ICE_AQ_VSI_PROP_RXQ_MAP_VALID BIT(6) +#define ICE_AQ_VSI_PROP_Q_OPT_VALID BIT(7) +#define ICE_AQ_VSI_PROP_OUTER_UP_VALID BIT(8) +#define ICE_AQ_VSI_PROP_FLOW_DIR_VALID BIT(11) +#define ICE_AQ_VSI_PROP_PASID_VALID BIT(12) + /* switch section */ + u8 sw_id; + u8 sw_flags; +#define ICE_AQ_VSI_SW_FLAG_ALLOW_LB BIT(5) +#define ICE_AQ_VSI_SW_FLAG_LOCAL_LB BIT(6) +#define ICE_AQ_VSI_SW_FLAG_SRC_PRUNE BIT(7) + u8 sw_flags2; +#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S 0 +#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_M \ + (0xF << ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S) +#define ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA BIT(0) +#define ICE_AQ_VSI_SW_FLAG_LAN_ENA BIT(4) + u8 veb_stat_id; +#define ICE_AQ_VSI_SW_VEB_STAT_ID_S 0 +#define ICE_AQ_VSI_SW_VEB_STAT_ID_M (0x1F << ICE_AQ_VSI_SW_VEB_STAT_ID_S) +#define ICE_AQ_VSI_SW_VEB_STAT_ID_VALID BIT(5) + /* security section */ + u8 sec_flags; +#define ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD BIT(0) +#define ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF BIT(2) +#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S 4 +#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_M (0xF << ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S) +#define ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA BIT(0) + u8 sec_reserved; + /* VLAN section */ + __le16 pvid; /* VLANS include priority bits */ + u8 pvlan_reserved[2]; + u8 vlan_flags; +#define ICE_AQ_VSI_VLAN_MODE_S 0 +#define ICE_AQ_VSI_VLAN_MODE_M (0x3 << ICE_AQ_VSI_VLAN_MODE_S) +#define ICE_AQ_VSI_VLAN_MODE_UNTAGGED 0x1 +#define ICE_AQ_VSI_VLAN_MODE_TAGGED 0x2 +#define ICE_AQ_VSI_VLAN_MODE_ALL 0x3 +#define ICE_AQ_VSI_PVLAN_INSERT_PVID BIT(2) +#define ICE_AQ_VSI_VLAN_EMOD_S 3 +#define ICE_AQ_VSI_VLAN_EMOD_M (0x3 << ICE_AQ_VSI_VLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_STR_BOTH (0x0 << ICE_AQ_VSI_VLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_STR_UP (0x1 << ICE_AQ_VSI_VLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_STR (0x2 << ICE_AQ_VSI_VLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_NOTHING (0x3 << ICE_AQ_VSI_VLAN_EMOD_S) + u8 pvlan_reserved2[3]; + /* ingress egress up sections */ + __le32 ingress_table; /* bitmap, 3 bits per up */ +#define ICE_AQ_VSI_UP_TABLE_UP0_S 0 +#define ICE_AQ_VSI_UP_TABLE_UP0_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP0_S) +#define ICE_AQ_VSI_UP_TABLE_UP1_S 3 +#define ICE_AQ_VSI_UP_TABLE_UP1_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP1_S) +#define ICE_AQ_VSI_UP_TABLE_UP2_S 6 +#define ICE_AQ_VSI_UP_TABLE_UP2_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP2_S) +#define ICE_AQ_VSI_UP_TABLE_UP3_S 9 +#define ICE_AQ_VSI_UP_TABLE_UP3_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP3_S) +#define ICE_AQ_VSI_UP_TABLE_UP4_S 12 +#define ICE_AQ_VSI_UP_TABLE_UP4_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP4_S) +#define ICE_AQ_VSI_UP_TABLE_UP5_S 15 +#define ICE_AQ_VSI_UP_TABLE_UP5_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP5_S) +#define ICE_AQ_VSI_UP_TABLE_UP6_S 18 +#define ICE_AQ_VSI_UP_TABLE_UP6_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP6_S) +#define ICE_AQ_VSI_UP_TABLE_UP7_S 21 +#define ICE_AQ_VSI_UP_TABLE_UP7_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP7_S) + __le32 egress_table; /* same defines as for ingress table */ + /* outer tags section */ + __le16 outer_tag; + u8 outer_tag_flags; +#define ICE_AQ_VSI_OUTER_TAG_MODE_S 0 +#define ICE_AQ_VSI_OUTER_TAG_MODE_M (0x3 << ICE_AQ_VSI_OUTER_TAG_MODE_S) +#define ICE_AQ_VSI_OUTER_TAG_NOTHING 0x0 +#define ICE_AQ_VSI_OUTER_TAG_REMOVE 0x1 +#define ICE_AQ_VSI_OUTER_TAG_COPY 0x2 +#define ICE_AQ_VSI_OUTER_TAG_TYPE_S 2 +#define ICE_AQ_VSI_OUTER_TAG_TYPE_M (0x3 << ICE_AQ_VSI_OUTER_TAG_TYPE_S) +#define ICE_AQ_VSI_OUTER_TAG_NONE 0x0 +#define ICE_AQ_VSI_OUTER_TAG_STAG 0x1 +#define ICE_AQ_VSI_OUTER_TAG_VLAN_8100 0x2 +#define ICE_AQ_VSI_OUTER_TAG_VLAN_9100 0x3 +#define ICE_AQ_VSI_OUTER_TAG_INSERT BIT(4) +#define ICE_AQ_VSI_OUTER_TAG_ACCEPT_HOST BIT(6) + u8 outer_tag_reserved; + /* queue mapping section */ + __le16 mapping_flags; +#define ICE_AQ_VSI_Q_MAP_CONTIG 0x0 +#define ICE_AQ_VSI_Q_MAP_NONCONTIG BIT(0) + __le16 q_mapping[16]; +#define ICE_AQ_VSI_Q_S 0 +#define ICE_AQ_VSI_Q_M (0x7FF << ICE_AQ_VSI_Q_S) + __le16 tc_mapping[8]; +#define ICE_AQ_VSI_TC_Q_OFFSET_S 0 +#define ICE_AQ_VSI_TC_Q_OFFSET_M (0x7FF << ICE_AQ_VSI_TC_Q_OFFSET_S) +#define ICE_AQ_VSI_TC_Q_NUM_S 11 +#define ICE_AQ_VSI_TC_Q_NUM_M (0xF << ICE_AQ_VSI_TC_Q_NUM_S) + /* queueing option section */ + u8 q_opt_rss; +#define ICE_AQ_VSI_Q_OPT_RSS_LUT_S 0 +#define ICE_AQ_VSI_Q_OPT_RSS_LUT_M (0x3 << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) +#define ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI 0x0 +#define ICE_AQ_VSI_Q_OPT_RSS_LUT_PF 0x2 +#define ICE_AQ_VSI_Q_OPT_RSS_LUT_GBL 0x3 +#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S 2 +#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M (0xF << ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S) +#define ICE_AQ_VSI_Q_OPT_RSS_HASH_S 6 +#define ICE_AQ_VSI_Q_OPT_RSS_HASH_M (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) +#define ICE_AQ_VSI_Q_OPT_RSS_TPLZ (0x0 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) +#define ICE_AQ_VSI_Q_OPT_RSS_SYM_TPLZ (0x1 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) +#define ICE_AQ_VSI_Q_OPT_RSS_XOR (0x2 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) +#define ICE_AQ_VSI_Q_OPT_RSS_JHASH (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) + u8 q_opt_tc; +#define ICE_AQ_VSI_Q_OPT_TC_OVR_S 0 +#define ICE_AQ_VSI_Q_OPT_TC_OVR_M (0x1F << ICE_AQ_VSI_Q_OPT_TC_OVR_S) +#define ICE_AQ_VSI_Q_OPT_PROF_TC_OVR BIT(7) + u8 q_opt_flags; +#define ICE_AQ_VSI_Q_OPT_PE_FLTR_EN BIT(0) + u8 q_opt_reserved[3]; + /* outer up section */ + __le32 outer_up_table; /* same structure and defines as ingress tbl */ + /* section 10 */ + __le16 sect_10_reserved; + /* flow director section */ + __le16 fd_options; +#define ICE_AQ_VSI_FD_ENABLE BIT(0) +#define ICE_AQ_VSI_FD_TX_AUTO_ENABLE BIT(1) +#define ICE_AQ_VSI_FD_PROG_ENABLE BIT(3) + __le16 max_fd_fltr_dedicated; + __le16 max_fd_fltr_shared; + __le16 fd_def_q; +#define ICE_AQ_VSI_FD_DEF_Q_S 0 +#define ICE_AQ_VSI_FD_DEF_Q_M (0x7FF << ICE_AQ_VSI_FD_DEF_Q_S) +#define ICE_AQ_VSI_FD_DEF_GRP_S 12 +#define ICE_AQ_VSI_FD_DEF_GRP_M (0x7 << ICE_AQ_VSI_FD_DEF_GRP_S) + __le16 fd_report_opt; +#define ICE_AQ_VSI_FD_REPORT_Q_S 0 +#define ICE_AQ_VSI_FD_REPORT_Q_M (0x7FF << ICE_AQ_VSI_FD_REPORT_Q_S) +#define ICE_AQ_VSI_FD_DEF_PRIORITY_S 12 +#define ICE_AQ_VSI_FD_DEF_PRIORITY_M (0x7 << ICE_AQ_VSI_FD_DEF_PRIORITY_S) +#define ICE_AQ_VSI_FD_DEF_DROP BIT(15) + /* PASID section */ + __le32 pasid_id; +#define ICE_AQ_VSI_PASID_ID_S 0 +#define ICE_AQ_VSI_PASID_ID_M (0xFFFFF << ICE_AQ_VSI_PASID_ID_S) +#define ICE_AQ_VSI_PASID_ID_VALID BIT(31) + u8 reserved[24]; +}; + +/* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3) + */ +struct ice_aqc_sw_rules { + /* ops: add switch rules, referring the number of rules. + * ops: update switch rules, referring the number of filters + * ops: remove switch rules, referring the entry index. + * ops: get switch rules, referring to the number of filters. + */ + __le16 num_rules_fltr_entry_index; + u8 reserved[6]; + __le32 addr_high; + __le32 addr_low; +}; + +/* Add/Update/Get/Remove lookup Rx/Tx command/response entry + * This structures describes the lookup rules and associated actions. "index" + * is returned as part of a response to a successful Add command, and can be + * used to identify the rule for Update/Get/Remove commands. + */ +struct ice_sw_rule_lkup_rx_tx { + __le16 recipe_id; +#define ICE_SW_RECIPE_LOGICAL_PORT_FWD 10 + /* Source port for LOOKUP_RX and source VSI in case of LOOKUP_TX */ + __le16 src; + __le32 act; + + /* Bit 0:1 - Action type */ +#define ICE_SINGLE_ACT_TYPE_S 0x00 +#define ICE_SINGLE_ACT_TYPE_M (0x3 << ICE_SINGLE_ACT_TYPE_S) + + /* Bit 2 - Loop back enable + * Bit 3 - LAN enable + */ +#define ICE_SINGLE_ACT_LB_ENABLE BIT(2) +#define ICE_SINGLE_ACT_LAN_ENABLE BIT(3) + + /* Action type = 0 - Forward to VSI or VSI list */ +#define ICE_SINGLE_ACT_VSI_FORWARDING 0x0 + +#define ICE_SINGLE_ACT_VSI_ID_S 4 +#define ICE_SINGLE_ACT_VSI_ID_M (0x3FF << ICE_SINGLE_ACT_VSI_ID_S) +#define ICE_SINGLE_ACT_VSI_LIST_ID_S 4 +#define ICE_SINGLE_ACT_VSI_LIST_ID_M (0x3FF << ICE_SINGLE_ACT_VSI_LIST_ID_S) + /* This bit needs to be set if action is forward to VSI list */ +#define ICE_SINGLE_ACT_VSI_LIST BIT(14) +#define ICE_SINGLE_ACT_VALID_BIT BIT(17) +#define ICE_SINGLE_ACT_DROP BIT(18) + + /* Action type = 1 - Forward to Queue of Queue group */ +#define ICE_SINGLE_ACT_TO_Q 0x1 +#define ICE_SINGLE_ACT_Q_INDEX_S 4 +#define ICE_SINGLE_ACT_Q_INDEX_M (0x7FF << ICE_SINGLE_ACT_Q_INDEX_S) +#define ICE_SINGLE_ACT_Q_REGION_S 15 +#define ICE_SINGLE_ACT_Q_REGION_M (0x7 << ICE_SINGLE_ACT_Q_REGION_S) +#define ICE_SINGLE_ACT_Q_PRIORITY BIT(18) + + /* Action type = 2 - Prune */ +#define ICE_SINGLE_ACT_PRUNE 0x2 +#define ICE_SINGLE_ACT_EGRESS BIT(15) +#define ICE_SINGLE_ACT_INGRESS BIT(16) +#define ICE_SINGLE_ACT_PRUNET BIT(17) + /* Bit 18 should be set to 0 for this action */ + + /* Action type = 2 - Pointer */ +#define ICE_SINGLE_ACT_PTR 0x2 +#define ICE_SINGLE_ACT_PTR_VAL_S 4 +#define ICE_SINGLE_ACT_PTR_VAL_M (0x1FFF << ICE_SINGLE_ACT_PTR_VAL_S) + /* Bit 18 should be set to 1 */ +#define ICE_SINGLE_ACT_PTR_BIT BIT(18) + + /* Action type = 3 - Other actions. Last two bits + * are other action identifier + */ +#define ICE_SINGLE_ACT_OTHER_ACTS 0x3 +#define ICE_SINGLE_OTHER_ACT_IDENTIFIER_S 17 +#define ICE_SINGLE_OTHER_ACT_IDENTIFIER_M \ + (0x3 << \ ICE_SINGLE_OTHER_ACT_IDENTIFIER_S) + + /* Bit 17:18 - Defines other actions */ + /* Other action = 0 - Mirror VSI */ +#define ICE_SINGLE_OTHER_ACT_MIRROR 0 +#define ICE_SINGLE_ACT_MIRROR_VSI_ID_S 4 +#define ICE_SINGLE_ACT_MIRROR_VSI_ID_M \ + (0x3FF << ICE_SINGLE_ACT_MIRROR_VSI_ID_S) + + /* Other action = 3 - Set Stat count */ +#define ICE_SINGLE_OTHER_ACT_STAT_COUNT 3 +#define ICE_SINGLE_ACT_STAT_COUNT_INDEX_S 4 +#define ICE_SINGLE_ACT_STAT_COUNT_INDEX_M \ + (0x7F << ICE_SINGLE_ACT_STAT_COUNT_INDEX_S) + + __le16 index; /* The index of the rule in the lookup table */ + /* Length and values of the header to be matched per recipe or + * lookup-type + */ + __le16 hdr_len; + u8 hdr[1]; +} __packed; + +/* Add/Update/Remove large action command/response entry + * "index" is returned as part of a response to a successful Add command, and + * can be used to identify the action for Update/Get/Remove commands. + */ +struct ice_sw_rule_lg_act { + __le16 index; /* Index in large action table */ + __le16 size; + __le32 act[1]; /* array of size for actions */ + /* Max number of large actions */ +#define ICE_MAX_LG_ACT 4 + /* Bit 0:1 - Action type */ +#define ICE_LG_ACT_TYPE_S 0 +#define ICE_LG_ACT_TYPE_M (0x7 << ICE_LG_ACT_TYPE_S) + + /* Action type = 0 - Forward to VSI or VSI list */ +#define ICE_LG_ACT_VSI_FORWARDING 0 +#define ICE_LG_ACT_VSI_ID_S 3 +#define ICE_LG_ACT_VSI_ID_M (0x3FF << ICE_LG_ACT_VSI_ID_S) +#define ICE_LG_ACT_VSI_LIST_ID_S 3 +#define ICE_LG_ACT_VSI_LIST_ID_M (0x3FF << ICE_LG_ACT_VSI_LIST_ID_S) + /* This bit needs to be set if action is forward to VSI list */ +#define ICE_LG_ACT_VSI_LIST BIT(13) + +#define ICE_LG_ACT_VALID_BIT BIT(16) + + /* Action type = 1 - Forward to Queue of Queue group */ +#define ICE_LG_ACT_TO_Q 0x1 +#define ICE_LG_ACT_Q_INDEX_S 3 +#define ICE_LG_ACT_Q_INDEX_M (0x7FF << ICE_LG_ACT_Q_INDEX_S) +#define ICE_LG_ACT_Q_REGION_S 14 +#define ICE_LG_ACT_Q_REGION_M (0x7 << ICE_LG_ACT_Q_REGION_S) +#define ICE_LG_ACT_Q_PRIORITY_SET BIT(17) + + /* Action type = 2 - Prune */ +#define ICE_LG_ACT_PRUNE 0x2 +#define ICE_LG_ACT_EGRESS BIT(14) +#define ICE_LG_ACT_INGRESS BIT(15) +#define ICE_LG_ACT_PRUNET BIT(16) + + /* Action type = 3 - Mirror VSI */ +#define ICE_LG_OTHER_ACT_MIRROR 0x3 +#define ICE_LG_ACT_MIRROR_VSI_ID_S 3 +#define ICE_LG_ACT_MIRROR_VSI_ID_M (0x3FF << ICE_LG_ACT_MIRROR_VSI_ID_S) + + /* Action type = 5 - Generic Value */ +#define ICE_LG_ACT_GENERIC 0x5 +#define ICE_LG_ACT_GENERIC_VALUE_S 3 +#define ICE_LG_ACT_GENERIC_VALUE_M (0xFFFF << ICE_LG_ACT_GENERIC_VALUE_S) +#define ICE_LG_ACT_GENERIC_OFFSET_S 19 +#define ICE_LG_ACT_GENERIC_OFFSET_M (0x7 << ICE_LG_ACT_GENERIC_OFFSET_S) +#define ICE_LG_ACT_GENERIC_PRIORITY_S 22 +#define ICE_LG_ACT_GENERIC_PRIORITY_M (0x7 << ICE_LG_ACT_GENERIC_PRIORITY_S) +#define ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX 7 + + /* Action = 7 - Set Stat count */ +#define ICE_LG_ACT_STAT_COUNT 0x7 +#define ICE_LG_ACT_STAT_COUNT_S 3 +#define ICE_LG_ACT_STAT_COUNT_M (0x7F << ICE_LG_ACT_STAT_COUNT_S) +}; + +/* Add/Update/Remove VSI list command/response entry + * "index" is returned as part of a response to a successful Add command, and + * can be used to identify the VSI list for Update/Get/Remove commands. + */ +struct ice_sw_rule_vsi_list { + __le16 index; /* Index of VSI/Prune list */ + __le16 number_vsi; + __le16 vsi[1]; /* Array of number_vsi VSI numbers */ +}; + +/* Query VSI list command/response entry */ +struct ice_sw_rule_vsi_list_query { + __le16 index; + DECLARE_BITMAP(vsi_list, ICE_MAX_VSI); +} __packed; + +/* Add switch rule response: + * Content of return buffer is same as the input buffer. The status field and + * LUT index are updated as part of the response + */ +struct ice_aqc_sw_rules_elem { + __le16 type; /* Switch rule type, one of T_... */ +#define ICE_AQC_SW_RULES_T_LKUP_RX 0x0 +#define ICE_AQC_SW_RULES_T_LKUP_TX 0x1 +#define ICE_AQC_SW_RULES_T_LG_ACT 0x2 +#define ICE_AQC_SW_RULES_T_VSI_LIST_SET 0x3 +#define ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR 0x4 +#define ICE_AQC_SW_RULES_T_PRUNE_LIST_SET 0x5 +#define ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR 0x6 + __le16 status; + union { + struct ice_sw_rule_lkup_rx_tx lkup_tx_rx; + struct ice_sw_rule_lg_act lg_act; + struct ice_sw_rule_vsi_list vsi_list; + struct ice_sw_rule_vsi_list_query vsi_list_query; + } __packed pdata; +}; + +/* Get Default Topology (indirect 0x0400) */ +struct ice_aqc_get_topo { + u8 port_num; + u8 num_branches; + __le16 reserved1; + __le32 reserved2; + __le32 addr_high; + __le32 addr_low; +}; + +/* Update TSE (indirect 0x0403) + * Get TSE (indirect 0x0404) + */ +struct ice_aqc_get_cfg_elem { + __le16 num_elem_req; /* Used by commands */ + __le16 num_elem_resp; /* Used by responses */ + __le32 reserved; + __le32 addr_high; + __le32 addr_low; +}; + +/* This is the buffer for: + * Suspend Nodes (indirect 0x0409) + * Resume Nodes (indirect 0x040A) + */ +struct ice_aqc_suspend_resume_elem { + __le32 teid[1]; +}; + +/* Add TSE (indirect 0x0401) + * Delete TSE (indirect 0x040F) + * Move TSE (indirect 0x0408) + */ +struct ice_aqc_add_move_delete_elem { + __le16 num_grps_req; + __le16 num_grps_updated; + __le32 reserved; + __le32 addr_high; + __le32 addr_low; +}; + +struct ice_aqc_elem_info_bw { + __le16 bw_profile_idx; + __le16 bw_alloc; +}; + +struct ice_aqc_txsched_elem { + u8 elem_type; /* Special field, reserved for some aq calls */ +#define ICE_AQC_ELEM_TYPE_UNDEFINED 0x0 +#define ICE_AQC_ELEM_TYPE_ROOT_PORT 0x1 +#define ICE_AQC_ELEM_TYPE_TC 0x2 +#define ICE_AQC_ELEM_TYPE_SE_GENERIC 0x3 +#define ICE_AQC_ELEM_TYPE_ENTRY_POINT 0x4 +#define ICE_AQC_ELEM_TYPE_LEAF 0x5 +#define ICE_AQC_ELEM_TYPE_SE_PADDED 0x6 + u8 valid_sections; +#define ICE_AQC_ELEM_VALID_GENERIC BIT(0) +#define ICE_AQC_ELEM_VALID_CIR BIT(1) +#define ICE_AQC_ELEM_VALID_EIR BIT(2) +#define ICE_AQC_ELEM_VALID_SHARED BIT(3) + u8 generic; +#define ICE_AQC_ELEM_GENERIC_MODE_M 0x1 +#define ICE_AQC_ELEM_GENERIC_PRIO_S 0x1 +#define ICE_AQC_ELEM_GENERIC_PRIO_M (0x7 << ICE_AQC_ELEM_GENERIC_PRIO_S) +#define ICE_AQC_ELEM_GENERIC_SP_S 0x4 +#define ICE_AQC_ELEM_GENERIC_SP_M (0x1 << ICE_AQC_ELEM_GENERIC_SP_S) +#define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S 0x5 +#define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_M \ + (0x3 << ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S) + u8 flags; /* Special field, reserved for some aq calls */ +#define ICE_AQC_ELEM_FLAG_SUSPEND_M 0x1 + struct ice_aqc_elem_info_bw cir_bw; + struct ice_aqc_elem_info_bw eir_bw; + __le16 srl_id; + __le16 reserved2; +}; + +struct ice_aqc_txsched_elem_data { + __le32 parent_teid; + __le32 node_teid; + struct ice_aqc_txsched_elem data; +}; + +struct ice_aqc_txsched_topo_grp_info_hdr { + __le32 parent_teid; + __le16 num_elems; + __le16 reserved2; +}; + +struct ice_aqc_add_elem { + struct ice_aqc_txsched_topo_grp_info_hdr hdr; + struct ice_aqc_txsched_elem_data generic[1]; +}; + +struct ice_aqc_get_topo_elem { + struct ice_aqc_txsched_topo_grp_info_hdr hdr; + struct ice_aqc_txsched_elem_data + generic[ICE_AQC_TOPO_MAX_LEVEL_NUM]; +}; + +struct ice_aqc_delete_elem { + struct ice_aqc_txsched_topo_grp_info_hdr hdr; + __le32 teid[1]; +}; + +/* Query Scheduler Resource Allocation (indirect 0x0412) + * This indirect command retrieves the scheduler resources allocated by + * EMP Firmware to the given PF. + */ +struct ice_aqc_query_txsched_res { + u8 reserved[8]; + __le32 addr_high; + __le32 addr_low; +}; + +struct ice_aqc_generic_sched_props { + __le16 phys_levels; + __le16 logical_levels; + u8 flattening_bitmap; + u8 max_device_cgds; + u8 max_pf_cgds; + u8 rsvd0; + __le16 rdma_qsets; + u8 rsvd1[22]; +}; + +struct ice_aqc_layer_props { + u8 logical_layer; + u8 chunk_size; + __le16 max_device_nodes; + __le16 max_pf_nodes; + u8 rsvd0[2]; + __le16 max_shared_rate_lmtr; + __le16 max_children; + __le16 max_cir_rl_profiles; + __le16 max_eir_rl_profiles; + __le16 max_srl_profiles; + u8 rsvd1[14]; +}; + +struct ice_aqc_query_txsched_res_resp { + struct ice_aqc_generic_sched_props sched_props; + struct ice_aqc_layer_props layer_props[ICE_AQC_TOPO_MAX_LEVEL_NUM]; +}; + +/* Get PHY capabilities (indirect 0x0600) */ +struct ice_aqc_get_phy_caps { + u8 lport_num; + u8 reserved; + __le16 param0; + /* 18.0 - Report qualified modules */ +#define ICE_AQC_GET_PHY_RQM BIT(0) + /* 18.1 - 18.2 : Report mode + * 00b - Report NVM capabilities + * 01b - Report topology capabilities + * 10b - Report SW configured + */ +#define ICE_AQC_REPORT_MODE_S 1 +#define ICE_AQC_REPORT_MODE_M (3 << ICE_AQC_REPORT_MODE_S) +#define ICE_AQC_REPORT_NVM_CAP 0 +#define ICE_AQC_REPORT_TOPO_CAP BIT(1) +#define ICE_AQC_REPORT_SW_CFG BIT(2) + __le32 reserved1; + __le32 addr_high; + __le32 addr_low; +}; + +/* This is #define of PHY type (Extended): + * The first set of defines is for phy_type_low. + */ +#define ICE_PHY_TYPE_LOW_100BASE_TX BIT_ULL(0) +#define ICE_PHY_TYPE_LOW_100M_SGMII BIT_ULL(1) +#define ICE_PHY_TYPE_LOW_1000BASE_T BIT_ULL(2) +#define ICE_PHY_TYPE_LOW_1000BASE_SX BIT_ULL(3) +#define ICE_PHY_TYPE_LOW_1000BASE_LX BIT_ULL(4) +#define ICE_PHY_TYPE_LOW_1000BASE_KX BIT_ULL(5) +#define ICE_PHY_TYPE_LOW_1G_SGMII BIT_ULL(6) +#define ICE_PHY_TYPE_LOW_2500BASE_T BIT_ULL(7) +#define ICE_PHY_TYPE_LOW_2500BASE_X BIT_ULL(8) +#define ICE_PHY_TYPE_LOW_2500BASE_KX BIT_ULL(9) +#define ICE_PHY_TYPE_LOW_5GBASE_T BIT_ULL(10) +#define ICE_PHY_TYPE_LOW_5GBASE_KR BIT_ULL(11) +#define ICE_PHY_TYPE_LOW_10GBASE_T BIT_ULL(12) +#define ICE_PHY_TYPE_LOW_10G_SFI_DA BIT_ULL(13) +#define ICE_PHY_TYPE_LOW_10GBASE_SR BIT_ULL(14) +#define ICE_PHY_TYPE_LOW_10GBASE_LR BIT_ULL(15) +#define ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 BIT_ULL(16) +#define ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC BIT_ULL(17) +#define ICE_PHY_TYPE_LOW_10G_SFI_C2C BIT_ULL(18) +#define ICE_PHY_TYPE_LOW_25GBASE_T BIT_ULL(19) +#define ICE_PHY_TYPE_LOW_25GBASE_CR BIT_ULL(20) +#define ICE_PHY_TYPE_LOW_25GBASE_CR_S BIT_ULL(21) +#define ICE_PHY_TYPE_LOW_25GBASE_CR1 BIT_ULL(22) +#define ICE_PHY_TYPE_LOW_25GBASE_SR BIT_ULL(23) +#define ICE_PHY_TYPE_LOW_25GBASE_LR BIT_ULL(24) +#define ICE_PHY_TYPE_LOW_25GBASE_KR BIT_ULL(25) +#define ICE_PHY_TYPE_LOW_25GBASE_KR_S BIT_ULL(26) +#define ICE_PHY_TYPE_LOW_25GBASE_KR1 BIT_ULL(27) +#define ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC BIT_ULL(28) +#define ICE_PHY_TYPE_LOW_25G_AUI_C2C BIT_ULL(29) +#define ICE_PHY_TYPE_LOW_40GBASE_CR4 BIT_ULL(30) +#define ICE_PHY_TYPE_LOW_40GBASE_SR4 BIT_ULL(31) +#define ICE_PHY_TYPE_LOW_40GBASE_LR4 BIT_ULL(32) +#define ICE_PHY_TYPE_LOW_40GBASE_KR4 BIT_ULL(33) +#define ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC BIT_ULL(34) +#define ICE_PHY_TYPE_LOW_40G_XLAUI BIT_ULL(35) +#define ICE_PHY_TYPE_LOW_MAX_INDEX 63 + +struct ice_aqc_get_phy_caps_data { + __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */ + __le64 reserved; + u8 caps; +#define ICE_AQC_PHY_EN_TX_LINK_PAUSE BIT(0) +#define ICE_AQC_PHY_EN_RX_LINK_PAUSE BIT(1) +#define ICE_AQC_PHY_LOW_POWER_MODE BIT(2) +#define ICE_AQC_PHY_EN_LINK BIT(3) +#define ICE_AQC_PHY_AN_MODE BIT(4) +#define ICE_AQC_GET_PHY_EN_MOD_QUAL BIT(5) + u8 low_power_ctrl; +#define ICE_AQC_PHY_EN_D3COLD_LOW_POWER_AUTONEG BIT(0) + __le16 eee_cap; +#define ICE_AQC_PHY_EEE_EN_100BASE_TX BIT(0) +#define ICE_AQC_PHY_EEE_EN_1000BASE_T BIT(1) +#define ICE_AQC_PHY_EEE_EN_10GBASE_T BIT(2) +#define ICE_AQC_PHY_EEE_EN_1000BASE_KX BIT(3) +#define ICE_AQC_PHY_EEE_EN_10GBASE_KR BIT(4) +#define ICE_AQC_PHY_EEE_EN_25GBASE_KR BIT(5) +#define ICE_AQC_PHY_EEE_EN_40GBASE_KR4 BIT(6) + __le16 eeer_value; + u8 phy_id_oui[4]; /* PHY/Module ID connected on the port */ + u8 link_fec_options; +#define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN BIT(0) +#define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ BIT(1) +#define ICE_AQC_PHY_FEC_25G_RS_528_REQ BIT(2) +#define ICE_AQC_PHY_FEC_25G_KR_REQ BIT(3) +#define ICE_AQC_PHY_FEC_25G_RS_544_REQ BIT(4) +#define ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN BIT(6) +#define ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN BIT(7) + u8 extended_compliance_code; +#define ICE_MODULE_TYPE_TOTAL_BYTE 3 + u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE]; +#define ICE_AQC_MOD_TYPE_BYTE0_SFP_PLUS 0xA0 +#define ICE_AQC_MOD_TYPE_BYTE0_QSFP_PLUS 0x80 +#define ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE BIT(0) +#define ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE BIT(1) +#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_SR BIT(4) +#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_LR BIT(5) +#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_LRM BIT(6) +#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_ER BIT(7) +#define ICE_AQC_MOD_TYPE_BYTE2_SFP_PLUS 0xA0 +#define ICE_AQC_MOD_TYPE_BYTE2_QSFP_PLUS 0x86 + u8 qualified_module_count; +#define ICE_AQC_QUAL_MOD_COUNT_MAX 16 + struct { + u8 v_oui[3]; + u8 rsvd1; + u8 v_part[16]; + __le32 v_rev; + __le64 rsvd8; + } qual_modules[ICE_AQC_QUAL_MOD_COUNT_MAX]; +}; + +/* Set PHY capabilities (direct 0x0601) + * NOTE: This command must be followed by setup link and restart auto-neg + */ +struct ice_aqc_set_phy_cfg { + u8 lport_num; + u8 reserved[7]; + __le32 addr_high; + __le32 addr_low; +}; + +/* Set PHY config command data structure */ +struct ice_aqc_set_phy_cfg_data { + __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */ + __le64 rsvd0; + u8 caps; +#define ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY BIT(0) +#define ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY BIT(1) +#define ICE_AQ_PHY_ENA_LOW_POWER BIT(2) +#define ICE_AQ_PHY_ENA_LINK BIT(3) +#define ICE_AQ_PHY_ENA_ATOMIC_LINK BIT(5) + u8 low_power_ctrl; + __le16 eee_cap; /* Value from ice_aqc_get_phy_caps */ + __le16 eeer_value; + u8 link_fec_opt; /* Use defines from ice_aqc_get_phy_caps */ + u8 rsvd1; +}; + +/* Restart AN command data structure (direct 0x0605) + * Also used for response, with only the lport_num field present. + */ +struct ice_aqc_restart_an { + u8 lport_num; + u8 reserved; + u8 cmd_flags; +#define ICE_AQC_RESTART_AN_LINK_RESTART BIT(1) +#define ICE_AQC_RESTART_AN_LINK_ENABLE BIT(2) + u8 reserved2[13]; +}; + +/* Get link status (indirect 0x0607), also used for Link Status Event */ +struct ice_aqc_get_link_status { + u8 lport_num; + u8 reserved; + __le16 cmd_flags; +#define ICE_AQ_LSE_M 0x3 +#define ICE_AQ_LSE_NOP 0x0 +#define ICE_AQ_LSE_DIS 0x2 +#define ICE_AQ_LSE_ENA 0x3 + /* only response uses this flag */ +#define ICE_AQ_LSE_IS_ENABLED 0x1 + __le32 reserved2; + __le32 addr_high; + __le32 addr_low; +}; + +/* Get link status response data structure, also used for Link Status Event */ +struct ice_aqc_get_link_status_data { + u8 topo_media_conflict; +#define ICE_AQ_LINK_TOPO_CONFLICT BIT(0) +#define ICE_AQ_LINK_MEDIA_CONFLICT BIT(1) +#define ICE_AQ_LINK_TOPO_CORRUPT BIT(2) + u8 reserved1; + u8 link_info; +#define ICE_AQ_LINK_UP BIT(0) /* Link Status */ +#define ICE_AQ_LINK_FAULT BIT(1) +#define ICE_AQ_LINK_FAULT_TX BIT(2) +#define ICE_AQ_LINK_FAULT_RX BIT(3) +#define ICE_AQ_LINK_FAULT_REMOTE BIT(4) +#define ICE_AQ_LINK_UP_PORT BIT(5) /* External Port Link Status */ +#define ICE_AQ_MEDIA_AVAILABLE BIT(6) +#define ICE_AQ_SIGNAL_DETECT BIT(7) + u8 an_info; +#define ICE_AQ_AN_COMPLETED BIT(0) +#define ICE_AQ_LP_AN_ABILITY BIT(1) +#define ICE_AQ_PD_FAULT BIT(2) /* Parallel Detection Fault */ +#define ICE_AQ_FEC_EN BIT(3) +#define ICE_AQ_PHY_LOW_POWER BIT(4) /* Low Power State */ +#define ICE_AQ_LINK_PAUSE_TX BIT(5) +#define ICE_AQ_LINK_PAUSE_RX BIT(6) +#define ICE_AQ_QUALIFIED_MODULE BIT(7) + u8 ext_info; +#define ICE_AQ_LINK_PHY_TEMP_ALARM BIT(0) +#define ICE_AQ_LINK_EXCESSIVE_ERRORS BIT(1) /* Excessive Link Errors */ + /* Port TX Suspended */ +#define ICE_AQ_LINK_TX_S 2 +#define ICE_AQ_LINK_TX_M (0x03 << ICE_AQ_LINK_TX_S) +#define ICE_AQ_LINK_TX_ACTIVE 0 +#define ICE_AQ_LINK_TX_DRAINED 1 +#define ICE_AQ_LINK_TX_FLUSHED 3 + u8 reserved2; + __le16 max_frame_size; + u8 cfg; +#define ICE_AQ_LINK_25G_KR_FEC_EN BIT(0) +#define ICE_AQ_LINK_25G_RS_528_FEC_EN BIT(1) +#define ICE_AQ_LINK_25G_RS_544_FEC_EN BIT(2) + /* Pacing Config */ +#define ICE_AQ_CFG_PACING_S 3 +#define ICE_AQ_CFG_PACING_M (0xF << ICE_AQ_CFG_PACING_S) +#define ICE_AQ_CFG_PACING_TYPE_M BIT(7) +#define ICE_AQ_CFG_PACING_TYPE_AVG 0 +#define ICE_AQ_CFG_PACING_TYPE_FIXED ICE_AQ_CFG_PACING_TYPE_M + /* External Device Power Ability */ + u8 power_desc; +#define ICE_AQ_PWR_CLASS_M 0x3 +#define ICE_AQ_LINK_PWR_BASET_LOW_HIGH 0 +#define ICE_AQ_LINK_PWR_BASET_HIGH 1 +#define ICE_AQ_LINK_PWR_QSFP_CLASS_1 0 +#define ICE_AQ_LINK_PWR_QSFP_CLASS_2 1 +#define ICE_AQ_LINK_PWR_QSFP_CLASS_3 2 +#define ICE_AQ_LINK_PWR_QSFP_CLASS_4 3 + __le16 link_speed; +#define ICE_AQ_LINK_SPEED_10MB BIT(0) +#define ICE_AQ_LINK_SPEED_100MB BIT(1) +#define ICE_AQ_LINK_SPEED_1000MB BIT(2) +#define ICE_AQ_LINK_SPEED_2500MB BIT(3) +#define ICE_AQ_LINK_SPEED_5GB BIT(4) +#define ICE_AQ_LINK_SPEED_10GB BIT(5) +#define ICE_AQ_LINK_SPEED_20GB BIT(6) +#define ICE_AQ_LINK_SPEED_25GB BIT(7) +#define ICE_AQ_LINK_SPEED_40GB BIT(8) +#define ICE_AQ_LINK_SPEED_UNKNOWN BIT(15) + __le32 reserved3; /* Aligns next field to 8-byte boundary */ + __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */ + __le64 reserved4; +}; + +/* Set event mask command (direct 0x0613) */ +struct ice_aqc_set_event_mask { + u8 lport_num; + u8 reserved[7]; + __le16 event_mask; +#define ICE_AQ_LINK_EVENT_UPDOWN BIT(1) +#define ICE_AQ_LINK_EVENT_MEDIA_NA BIT(2) +#define ICE_AQ_LINK_EVENT_LINK_FAULT BIT(3) +#define ICE_AQ_LINK_EVENT_PHY_TEMP_ALARM BIT(4) +#define ICE_AQ_LINK_EVENT_EXCESSIVE_ERRORS BIT(5) +#define ICE_AQ_LINK_EVENT_SIGNAL_DETECT BIT(6) +#define ICE_AQ_LINK_EVENT_AN_COMPLETED BIT(7) +#define ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL BIT(8) +#define ICE_AQ_LINK_EVENT_PORT_TX_SUSPENDED BIT(9) + u8 reserved1[6]; +}; + +/* NVM Read command (indirect 0x0701) + * NVM Erase commands (direct 0x0702) + * NVM Update commands (indirect 0x0703) + */ +struct ice_aqc_nvm { + __le16 offset_low; + u8 offset_high; + u8 cmd_flags; +#define ICE_AQC_NVM_LAST_CMD BIT(0) +#define ICE_AQC_NVM_PCIR_REQ BIT(0) /* Used by NVM Update reply */ +#define ICE_AQC_NVM_PRESERVATION_S 1 +#define ICE_AQC_NVM_PRESERVATION_M (3 << ICE_AQC_NVM_PRESERVATION_S) +#define ICE_AQC_NVM_NO_PRESERVATION (0 << ICE_AQC_NVM_PRESERVATION_S) +#define ICE_AQC_NVM_PRESERVE_ALL BIT(1) +#define ICE_AQC_NVM_PRESERVE_SELECTED (3 << ICE_AQC_NVM_PRESERVATION_S) +#define ICE_AQC_NVM_FLASH_ONLY BIT(7) + __le16 module_typeid; + __le16 length; +#define ICE_AQC_NVM_ERASE_LEN 0xFFFF + __le32 addr_high; + __le32 addr_low; +}; + +/* Get/Set RSS key (indirect 0x0B04/0x0B02) */ +struct ice_aqc_get_set_rss_key { +#define ICE_AQC_GSET_RSS_KEY_VSI_VALID BIT(15) +#define ICE_AQC_GSET_RSS_KEY_VSI_ID_S 0 +#define ICE_AQC_GSET_RSS_KEY_VSI_ID_M (0x3FF << ICE_AQC_GSET_RSS_KEY_VSI_ID_S) + __le16 vsi_id; + u8 reserved[6]; + __le32 addr_high; + __le32 addr_low; +}; + +#define ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE 0x28 +#define ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE 0xC + +struct ice_aqc_get_set_rss_keys { + u8 standard_rss_key[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE]; + u8 extended_hash_key[ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE]; +}; + +/* Get/Set RSS LUT (indirect 0x0B05/0x0B03) */ +struct ice_aqc_get_set_rss_lut { +#define ICE_AQC_GSET_RSS_LUT_VSI_VALID BIT(15) +#define ICE_AQC_GSET_RSS_LUT_VSI_ID_S 0 +#define ICE_AQC_GSET_RSS_LUT_VSI_ID_M (0x1FF << ICE_AQC_GSET_RSS_LUT_VSI_ID_S) + __le16 vsi_id; +#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S 0 +#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_M \ + (0x3 << ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S) + +#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI 0 +#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF 1 +#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL 2 + +#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S 2 +#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M \ + (0x3 << ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) + +#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128 128 +#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128_FLAG 0 +#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512 512 +#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512_FLAG 1 +#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K 2048 +#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG 2 + +#define ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S 4 +#define ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_M \ + (0xF << ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S) + + __le16 flags; + __le32 reserved; + __le32 addr_high; + __le32 addr_low; +}; + +/* Add TX LAN Queues (indirect 0x0C30) */ +struct ice_aqc_add_txqs { + u8 num_qgrps; + u8 reserved[3]; + __le32 reserved1; + __le32 addr_high; + __le32 addr_low; +}; + +/* This is the descriptor of each queue entry for the Add TX LAN Queues + * command (0x0C30). Only used within struct ice_aqc_add_tx_qgrp. + */ +struct ice_aqc_add_txqs_perq { + __le16 txq_id; + u8 rsvd[2]; + __le32 q_teid; + u8 txq_ctx[22]; + u8 rsvd2[2]; + struct ice_aqc_txsched_elem info; +}; + +/* The format of the command buffer for Add TX LAN Queues (0x0C30) + * is an array of the following structs. Please note that the length of + * each struct ice_aqc_add_tx_qgrp is variable due + * to the variable number of queues in each group! + */ +struct ice_aqc_add_tx_qgrp { + __le32 parent_teid; + u8 num_txqs; + u8 rsvd[3]; + struct ice_aqc_add_txqs_perq txqs[1]; +}; + +/* Disable TX LAN Queues (indirect 0x0C31) */ +struct ice_aqc_dis_txqs { + u8 cmd_type; +#define ICE_AQC_Q_DIS_CMD_S 0 +#define ICE_AQC_Q_DIS_CMD_M (0x3 << ICE_AQC_Q_DIS_CMD_S) +#define ICE_AQC_Q_DIS_CMD_NO_FUNC_RESET (0 << ICE_AQC_Q_DIS_CMD_S) +#define ICE_AQC_Q_DIS_CMD_VM_RESET BIT(ICE_AQC_Q_DIS_CMD_S) +#define ICE_AQC_Q_DIS_CMD_VF_RESET (2 << ICE_AQC_Q_DIS_CMD_S) +#define ICE_AQC_Q_DIS_CMD_PF_RESET (3 << ICE_AQC_Q_DIS_CMD_S) +#define ICE_AQC_Q_DIS_CMD_SUBSEQ_CALL BIT(2) +#define ICE_AQC_Q_DIS_CMD_FLUSH_PIPE BIT(3) + u8 num_entries; + __le16 vmvf_and_timeout; +#define ICE_AQC_Q_DIS_VMVF_NUM_S 0 +#define ICE_AQC_Q_DIS_VMVF_NUM_M (0x3FF << ICE_AQC_Q_DIS_VMVF_NUM_S) +#define ICE_AQC_Q_DIS_TIMEOUT_S 10 +#define ICE_AQC_Q_DIS_TIMEOUT_M (0x3F << ICE_AQC_Q_DIS_TIMEOUT_S) + __le32 blocked_cgds; + __le32 addr_high; + __le32 addr_low; +}; + +/* The buffer for Disable TX LAN Queues (indirect 0x0C31) + * contains the following structures, arrayed one after the + * other. + * Note: Since the q_id is 16 bits wide, if the + * number of queues is even, then 2 bytes of alignment MUST be + * added before the start of the next group, to allow correct + * alignment of the parent_teid field. + */ +struct ice_aqc_dis_txq_item { + __le32 parent_teid; + u8 num_qs; + u8 rsvd; + /* The length of the q_id array varies according to num_qs */ + __le16 q_id[1]; + /* This only applies from F8 onward */ +#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S 15 +#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_LAN_Q \ + (0 << ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S) +#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_RDMA_QSET \ + (1 << ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S) +}; + +struct ice_aqc_dis_txq { + struct ice_aqc_dis_txq_item qgrps[1]; +}; + +/** + * struct ice_aq_desc - Admin Queue (AQ) descriptor + * @flags: ICE_AQ_FLAG_* flags + * @opcode: AQ command opcode + * @datalen: length in bytes of indirect/external data buffer + * @retval: return value from firmware + * @cookie_h: opaque data high-half + * @cookie_l: opaque data low-half + * @params: command-specific parameters + * + * Descriptor format for commands the driver posts on the Admin Transmit Queue + * (ATQ). The firmware writes back onto the command descriptor and returns + * the result of the command. Asynchronous events that are not an immediate + * result of the command are written to the Admin Receive Queue (ARQ) using + * the same descriptor format. Descriptors are in little-endian notation with + * 32-bit words. + */ +struct ice_aq_desc { + __le16 flags; + __le16 opcode; + __le16 datalen; + __le16 retval; + __le32 cookie_high; + __le32 cookie_low; + union { + u8 raw[16]; + struct ice_aqc_generic generic; + struct ice_aqc_get_ver get_ver; + struct ice_aqc_q_shutdown q_shutdown; + struct ice_aqc_req_res res_owner; + struct ice_aqc_manage_mac_read mac_read; + struct ice_aqc_manage_mac_write mac_write; + struct ice_aqc_clear_pxe clear_pxe; + struct ice_aqc_list_caps get_cap; + struct ice_aqc_get_phy_caps get_phy; + struct ice_aqc_set_phy_cfg set_phy; + struct ice_aqc_restart_an restart_an; + struct ice_aqc_get_sw_cfg get_sw_conf; + struct ice_aqc_sw_rules sw_rules; + struct ice_aqc_get_topo get_topo; + struct ice_aqc_get_cfg_elem get_update_elem; + struct ice_aqc_query_txsched_res query_sched_res; + struct ice_aqc_add_move_delete_elem add_move_delete_elem; + struct ice_aqc_nvm nvm; + struct ice_aqc_get_set_rss_lut get_set_rss_lut; + struct ice_aqc_get_set_rss_key get_set_rss_key; + struct ice_aqc_add_txqs add_txqs; + struct ice_aqc_dis_txqs dis_txqs; + struct ice_aqc_add_get_update_free_vsi vsi_cmd; + struct ice_aqc_alloc_free_res_cmd sw_res_ctrl; + struct ice_aqc_set_event_mask set_event_mask; + struct ice_aqc_get_link_status get_link_status; + } params; +}; + +/* FW defined boundary for a large buffer, 4k >= Large buffer > 512 bytes */ +#define ICE_AQ_LG_BUF 512 + +#define ICE_AQ_FLAG_ERR_S 2 +#define ICE_AQ_FLAG_LB_S 9 +#define ICE_AQ_FLAG_RD_S 10 +#define ICE_AQ_FLAG_BUF_S 12 +#define ICE_AQ_FLAG_SI_S 13 + +#define ICE_AQ_FLAG_ERR BIT(ICE_AQ_FLAG_ERR_S) /* 0x4 */ +#define ICE_AQ_FLAG_LB BIT(ICE_AQ_FLAG_LB_S) /* 0x200 */ +#define ICE_AQ_FLAG_RD BIT(ICE_AQ_FLAG_RD_S) /* 0x400 */ +#define ICE_AQ_FLAG_BUF BIT(ICE_AQ_FLAG_BUF_S) /* 0x1000 */ +#define ICE_AQ_FLAG_SI BIT(ICE_AQ_FLAG_SI_S) /* 0x2000 */ + +/* error codes */ +enum ice_aq_err { + ICE_AQ_RC_OK = 0, /* success */ + ICE_AQ_RC_ENOMEM = 9, /* Out of memory */ + ICE_AQ_RC_EBUSY = 12, /* Device or resource busy */ + ICE_AQ_RC_EEXIST = 13, /* object already exists */ + ICE_AQ_RC_ENOSPC = 16, /* No space left or allocation failure */ +}; + +/* Admin Queue command opcodes */ +enum ice_adminq_opc { + /* AQ commands */ + ice_aqc_opc_get_ver = 0x0001, + ice_aqc_opc_q_shutdown = 0x0003, + + /* resource ownership */ + ice_aqc_opc_req_res = 0x0008, + ice_aqc_opc_release_res = 0x0009, + + /* device/function capabilities */ + ice_aqc_opc_list_func_caps = 0x000A, + ice_aqc_opc_list_dev_caps = 0x000B, + + /* manage MAC address */ + ice_aqc_opc_manage_mac_read = 0x0107, + ice_aqc_opc_manage_mac_write = 0x0108, + + /* PXE */ + ice_aqc_opc_clear_pxe_mode = 0x0110, + + /* internal switch commands */ + ice_aqc_opc_get_sw_cfg = 0x0200, + + /* Alloc/Free/Get Resources */ + ice_aqc_opc_alloc_res = 0x0208, + ice_aqc_opc_free_res = 0x0209, + + /* VSI commands */ + ice_aqc_opc_add_vsi = 0x0210, + ice_aqc_opc_update_vsi = 0x0211, + ice_aqc_opc_free_vsi = 0x0213, + + /* switch rules population commands */ + ice_aqc_opc_add_sw_rules = 0x02A0, + ice_aqc_opc_update_sw_rules = 0x02A1, + ice_aqc_opc_remove_sw_rules = 0x02A2, + + ice_aqc_opc_clear_pf_cfg = 0x02A4, + + /* transmit scheduler commands */ + ice_aqc_opc_get_dflt_topo = 0x0400, + ice_aqc_opc_add_sched_elems = 0x0401, + ice_aqc_opc_suspend_sched_elems = 0x0409, + ice_aqc_opc_resume_sched_elems = 0x040A, + ice_aqc_opc_delete_sched_elems = 0x040F, + ice_aqc_opc_query_sched_res = 0x0412, + + /* PHY commands */ + ice_aqc_opc_get_phy_caps = 0x0600, + ice_aqc_opc_set_phy_cfg = 0x0601, + ice_aqc_opc_restart_an = 0x0605, + ice_aqc_opc_get_link_status = 0x0607, + ice_aqc_opc_set_event_mask = 0x0613, + + /* NVM commands */ + ice_aqc_opc_nvm_read = 0x0701, + + /* RSS commands */ + ice_aqc_opc_set_rss_key = 0x0B02, + ice_aqc_opc_set_rss_lut = 0x0B03, + ice_aqc_opc_get_rss_key = 0x0B04, + ice_aqc_opc_get_rss_lut = 0x0B05, + + /* TX queue handling commands/events */ + ice_aqc_opc_add_txqs = 0x0C30, + ice_aqc_opc_dis_txqs = 0x0C31, +}; + +#endif /* _ICE_ADMINQ_CMD_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c new file mode 100644 index 000000000..f8d00263d --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -0,0 +1,2294 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +#include "ice_common.h" +#include "ice_sched.h" +#include "ice_adminq_cmd.h" + +#define ICE_PF_RESET_WAIT_COUNT 200 + +#define ICE_NIC_FLX_ENTRY(hw, mdid, idx) \ + wr32((hw), GLFLXP_RXDID_FLX_WRD_##idx(ICE_RXDID_FLEX_NIC), \ + ((ICE_RX_OPC_MDID << \ + GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_S) & \ + GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_M) | \ + (((mdid) << GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_S) & \ + GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_M)) + +#define ICE_NIC_FLX_FLG_ENTRY(hw, flg_0, flg_1, flg_2, flg_3, idx) \ + wr32((hw), GLFLXP_RXDID_FLAGS(ICE_RXDID_FLEX_NIC, idx), \ + (((flg_0) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S) & \ + GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M) | \ + (((flg_1) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S) & \ + GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M) | \ + (((flg_2) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S) & \ + GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M) | \ + (((flg_3) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S) & \ + GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M)) + +/** + * ice_set_mac_type - Sets MAC type + * @hw: pointer to the HW structure + * + * This function sets the MAC type of the adapter based on the + * vendor ID and device ID stored in the hw structure. + */ +static enum ice_status ice_set_mac_type(struct ice_hw *hw) +{ + if (hw->vendor_id != PCI_VENDOR_ID_INTEL) + return ICE_ERR_DEVICE_NOT_SUPPORTED; + + hw->mac_type = ICE_MAC_GENERIC; + return 0; +} + +/** + * ice_clear_pf_cfg - Clear PF configuration + * @hw: pointer to the hardware structure + * + * Clears any existing PF configuration (VSIs, VSI lists, switch rules, port + * configuration, flow director filters, etc.). + */ +enum ice_status ice_clear_pf_cfg(struct ice_hw *hw) +{ + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_clear_pf_cfg); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); +} + +/** + * ice_aq_manage_mac_read - manage MAC address read command + * @hw: pointer to the hw struct + * @buf: a virtual buffer to hold the manage MAC read response + * @buf_size: Size of the virtual buffer + * @cd: pointer to command details structure or NULL + * + * This function is used to return per PF station MAC address (0x0107). + * NOTE: Upon successful completion of this command, MAC address information + * is returned in user specified buffer. Please interpret user specified + * buffer as "manage_mac_read" response. + * Response such as various MAC addresses are stored in HW struct (port.mac) + * ice_aq_discover_caps is expected to be called before this function is called. + */ +static enum ice_status +ice_aq_manage_mac_read(struct ice_hw *hw, void *buf, u16 buf_size, + struct ice_sq_cd *cd) +{ + struct ice_aqc_manage_mac_read_resp *resp; + struct ice_aqc_manage_mac_read *cmd; + struct ice_aq_desc desc; + enum ice_status status; + u16 flags; + u8 i; + + cmd = &desc.params.mac_read; + + if (buf_size < sizeof(*resp)) + return ICE_ERR_BUF_TOO_SHORT; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_manage_mac_read); + + status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); + if (status) + return status; + + resp = (struct ice_aqc_manage_mac_read_resp *)buf; + flags = le16_to_cpu(cmd->flags) & ICE_AQC_MAN_MAC_READ_M; + + if (!(flags & ICE_AQC_MAN_MAC_LAN_ADDR_VALID)) { + ice_debug(hw, ICE_DBG_LAN, "got invalid MAC address\n"); + return ICE_ERR_CFG; + } + + /* A single port can report up to two (LAN and WoL) addresses */ + for (i = 0; i < cmd->num_addr; i++) + if (resp[i].addr_type == ICE_AQC_MAN_MAC_ADDR_TYPE_LAN) { + ether_addr_copy(hw->port_info->mac.lan_addr, + resp[i].mac_addr); + ether_addr_copy(hw->port_info->mac.perm_addr, + resp[i].mac_addr); + break; + } + + return 0; +} + +/** + * ice_aq_get_phy_caps - returns PHY capabilities + * @pi: port information structure + * @qual_mods: report qualified modules + * @report_mode: report mode capabilities + * @pcaps: structure for PHY capabilities to be filled + * @cd: pointer to command details structure or NULL + * + * Returns the various PHY capabilities supported on the Port (0x0600) + */ +static enum ice_status +ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode, + struct ice_aqc_get_phy_caps_data *pcaps, + struct ice_sq_cd *cd) +{ + struct ice_aqc_get_phy_caps *cmd; + u16 pcaps_size = sizeof(*pcaps); + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.get_phy; + + if (!pcaps || (report_mode & ~ICE_AQC_REPORT_MODE_M) || !pi) + return ICE_ERR_PARAM; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_phy_caps); + + if (qual_mods) + cmd->param0 |= cpu_to_le16(ICE_AQC_GET_PHY_RQM); + + cmd->param0 |= cpu_to_le16(report_mode); + status = ice_aq_send_cmd(pi->hw, &desc, pcaps, pcaps_size, cd); + + if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP) + pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low); + + return status; +} + +/** + * ice_get_media_type - Gets media type + * @pi: port information structure + */ +static enum ice_media_type ice_get_media_type(struct ice_port_info *pi) +{ + struct ice_link_status *hw_link_info; + + if (!pi) + return ICE_MEDIA_UNKNOWN; + + hw_link_info = &pi->phy.link_info; + + if (hw_link_info->phy_type_low) { + switch (hw_link_info->phy_type_low) { + case ICE_PHY_TYPE_LOW_1000BASE_SX: + case ICE_PHY_TYPE_LOW_1000BASE_LX: + case ICE_PHY_TYPE_LOW_10GBASE_SR: + case ICE_PHY_TYPE_LOW_10GBASE_LR: + case ICE_PHY_TYPE_LOW_10G_SFI_C2C: + case ICE_PHY_TYPE_LOW_25GBASE_SR: + case ICE_PHY_TYPE_LOW_25GBASE_LR: + case ICE_PHY_TYPE_LOW_25G_AUI_C2C: + case ICE_PHY_TYPE_LOW_40GBASE_SR4: + case ICE_PHY_TYPE_LOW_40GBASE_LR4: + return ICE_MEDIA_FIBER; + case ICE_PHY_TYPE_LOW_100BASE_TX: + case ICE_PHY_TYPE_LOW_1000BASE_T: + case ICE_PHY_TYPE_LOW_2500BASE_T: + case ICE_PHY_TYPE_LOW_5GBASE_T: + case ICE_PHY_TYPE_LOW_10GBASE_T: + case ICE_PHY_TYPE_LOW_25GBASE_T: + return ICE_MEDIA_BASET; + case ICE_PHY_TYPE_LOW_10G_SFI_DA: + case ICE_PHY_TYPE_LOW_25GBASE_CR: + case ICE_PHY_TYPE_LOW_25GBASE_CR_S: + case ICE_PHY_TYPE_LOW_25GBASE_CR1: + case ICE_PHY_TYPE_LOW_40GBASE_CR4: + return ICE_MEDIA_DA; + case ICE_PHY_TYPE_LOW_1000BASE_KX: + case ICE_PHY_TYPE_LOW_2500BASE_KX: + case ICE_PHY_TYPE_LOW_2500BASE_X: + case ICE_PHY_TYPE_LOW_5GBASE_KR: + case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1: + case ICE_PHY_TYPE_LOW_25GBASE_KR: + case ICE_PHY_TYPE_LOW_25GBASE_KR1: + case ICE_PHY_TYPE_LOW_25GBASE_KR_S: + case ICE_PHY_TYPE_LOW_40GBASE_KR4: + return ICE_MEDIA_BACKPLANE; + } + } + + return ICE_MEDIA_UNKNOWN; +} + +/** + * ice_aq_get_link_info + * @pi: port information structure + * @ena_lse: enable/disable LinkStatusEvent reporting + * @link: pointer to link status structure - optional + * @cd: pointer to command details structure or NULL + * + * Get Link Status (0x607). Returns the link status of the adapter. + */ +enum ice_status +ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, + struct ice_link_status *link, struct ice_sq_cd *cd) +{ + struct ice_link_status *hw_link_info_old, *hw_link_info; + struct ice_aqc_get_link_status_data link_data = { 0 }; + struct ice_aqc_get_link_status *resp; + enum ice_media_type *hw_media_type; + struct ice_fc_info *hw_fc_info; + bool tx_pause, rx_pause; + struct ice_aq_desc desc; + enum ice_status status; + u16 cmd_flags; + + if (!pi) + return ICE_ERR_PARAM; + hw_link_info_old = &pi->phy.link_info_old; + hw_media_type = &pi->phy.media_type; + hw_link_info = &pi->phy.link_info; + hw_fc_info = &pi->fc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status); + cmd_flags = (ena_lse) ? ICE_AQ_LSE_ENA : ICE_AQ_LSE_DIS; + resp = &desc.params.get_link_status; + resp->cmd_flags = cpu_to_le16(cmd_flags); + resp->lport_num = pi->lport; + + status = ice_aq_send_cmd(pi->hw, &desc, &link_data, sizeof(link_data), + cd); + + if (status) + return status; + + /* save off old link status information */ + *hw_link_info_old = *hw_link_info; + + /* update current link status information */ + hw_link_info->link_speed = le16_to_cpu(link_data.link_speed); + hw_link_info->phy_type_low = le64_to_cpu(link_data.phy_type_low); + *hw_media_type = ice_get_media_type(pi); + hw_link_info->link_info = link_data.link_info; + hw_link_info->an_info = link_data.an_info; + hw_link_info->ext_info = link_data.ext_info; + hw_link_info->max_frame_size = le16_to_cpu(link_data.max_frame_size); + hw_link_info->pacing = link_data.cfg & ICE_AQ_CFG_PACING_M; + + /* update fc info */ + tx_pause = !!(link_data.an_info & ICE_AQ_LINK_PAUSE_TX); + rx_pause = !!(link_data.an_info & ICE_AQ_LINK_PAUSE_RX); + if (tx_pause && rx_pause) + hw_fc_info->current_mode = ICE_FC_FULL; + else if (tx_pause) + hw_fc_info->current_mode = ICE_FC_TX_PAUSE; + else if (rx_pause) + hw_fc_info->current_mode = ICE_FC_RX_PAUSE; + else + hw_fc_info->current_mode = ICE_FC_NONE; + + hw_link_info->lse_ena = + !!(resp->cmd_flags & cpu_to_le16(ICE_AQ_LSE_IS_ENABLED)); + + /* save link status information */ + if (link) + *link = *hw_link_info; + + /* flag cleared so calling functions don't call AQ again */ + pi->phy.get_link_info = false; + + return status; +} + +/** + * ice_init_flex_parser - initialize rx flex parser + * @hw: pointer to the hardware structure + * + * Function to initialize flex descriptors + */ +static void ice_init_flex_parser(struct ice_hw *hw) +{ + u8 idx = 0; + + ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_HASH_LOW, 0); + ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_HASH_HIGH, 1); + ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_FLOW_ID_LOWER, 2); + ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_FLOW_ID_HIGH, 3); + ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_PKT_FRG, ICE_RXFLG_UDP_GRE, + ICE_RXFLG_PKT_DSI, ICE_RXFLG_FIN, idx++); + ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_SYN, ICE_RXFLG_RST, + ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI, idx++); + ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI, + ICE_RXFLG_EVLAN_x8100, ICE_RXFLG_EVLAN_x9100, + idx++); + ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_VLAN_x8100, ICE_RXFLG_TNL_VLAN, + ICE_RXFLG_TNL_MAC, ICE_RXFLG_TNL0, idx++); + ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_TNL1, ICE_RXFLG_TNL2, + ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI, idx); +} + +/** + * ice_init_fltr_mgmt_struct - initializes filter management list and locks + * @hw: pointer to the hw struct + */ +static enum ice_status ice_init_fltr_mgmt_struct(struct ice_hw *hw) +{ + struct ice_switch_info *sw; + + hw->switch_info = devm_kzalloc(ice_hw_to_dev(hw), + sizeof(*hw->switch_info), GFP_KERNEL); + sw = hw->switch_info; + + if (!sw) + return ICE_ERR_NO_MEMORY; + + INIT_LIST_HEAD(&sw->vsi_list_map_head); + + mutex_init(&sw->mac_list_lock); + INIT_LIST_HEAD(&sw->mac_list_head); + + mutex_init(&sw->vlan_list_lock); + INIT_LIST_HEAD(&sw->vlan_list_head); + + mutex_init(&sw->eth_m_list_lock); + INIT_LIST_HEAD(&sw->eth_m_list_head); + + mutex_init(&sw->promisc_list_lock); + INIT_LIST_HEAD(&sw->promisc_list_head); + + mutex_init(&sw->mac_vlan_list_lock); + INIT_LIST_HEAD(&sw->mac_vlan_list_head); + + return 0; +} + +/** + * ice_cleanup_fltr_mgmt_struct - cleanup filter management list and locks + * @hw: pointer to the hw struct + */ +static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw) +{ + struct ice_switch_info *sw = hw->switch_info; + struct ice_vsi_list_map_info *v_pos_map; + struct ice_vsi_list_map_info *v_tmp_map; + + list_for_each_entry_safe(v_pos_map, v_tmp_map, &sw->vsi_list_map_head, + list_entry) { + list_del(&v_pos_map->list_entry); + devm_kfree(ice_hw_to_dev(hw), v_pos_map); + } + + mutex_destroy(&sw->mac_list_lock); + mutex_destroy(&sw->vlan_list_lock); + mutex_destroy(&sw->eth_m_list_lock); + mutex_destroy(&sw->promisc_list_lock); + mutex_destroy(&sw->mac_vlan_list_lock); + + devm_kfree(ice_hw_to_dev(hw), sw); +} + +/** + * ice_init_hw - main hardware initialization routine + * @hw: pointer to the hardware structure + */ +enum ice_status ice_init_hw(struct ice_hw *hw) +{ + struct ice_aqc_get_phy_caps_data *pcaps; + enum ice_status status; + u16 mac_buf_len; + void *mac_buf; + + /* Set MAC type based on DeviceID */ + status = ice_set_mac_type(hw); + if (status) + return status; + + hw->pf_id = (u8)(rd32(hw, PF_FUNC_RID) & + PF_FUNC_RID_FUNC_NUM_M) >> + PF_FUNC_RID_FUNC_NUM_S; + + status = ice_reset(hw, ICE_RESET_PFR); + if (status) + return status; + + /* set these values to minimum allowed */ + hw->itr_gran_200 = ICE_ITR_GRAN_MIN_200; + hw->itr_gran_100 = ICE_ITR_GRAN_MIN_100; + hw->itr_gran_50 = ICE_ITR_GRAN_MIN_50; + hw->itr_gran_25 = ICE_ITR_GRAN_MIN_25; + + status = ice_init_all_ctrlq(hw); + if (status) + goto err_unroll_cqinit; + + status = ice_clear_pf_cfg(hw); + if (status) + goto err_unroll_cqinit; + + ice_clear_pxe_mode(hw); + + status = ice_init_nvm(hw); + if (status) + goto err_unroll_cqinit; + + status = ice_get_caps(hw); + if (status) + goto err_unroll_cqinit; + + hw->port_info = devm_kzalloc(ice_hw_to_dev(hw), + sizeof(*hw->port_info), GFP_KERNEL); + if (!hw->port_info) { + status = ICE_ERR_NO_MEMORY; + goto err_unroll_cqinit; + } + + /* set the back pointer to hw */ + hw->port_info->hw = hw; + + /* Initialize port_info struct with switch configuration data */ + status = ice_get_initial_sw_cfg(hw); + if (status) + goto err_unroll_alloc; + + hw->evb_veb = true; + + /* Query the allocated resources for tx scheduler */ + status = ice_sched_query_res_alloc(hw); + if (status) { + ice_debug(hw, ICE_DBG_SCHED, + "Failed to get scheduler allocated resources\n"); + goto err_unroll_alloc; + } + + /* Initialize port_info struct with scheduler data */ + status = ice_sched_init_port(hw->port_info); + if (status) + goto err_unroll_sched; + + pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL); + if (!pcaps) { + status = ICE_ERR_NO_MEMORY; + goto err_unroll_sched; + } + + /* Initialize port_info struct with PHY capabilities */ + status = ice_aq_get_phy_caps(hw->port_info, false, + ICE_AQC_REPORT_TOPO_CAP, pcaps, NULL); + devm_kfree(ice_hw_to_dev(hw), pcaps); + if (status) + goto err_unroll_sched; + + /* Initialize port_info struct with link information */ + status = ice_aq_get_link_info(hw->port_info, false, NULL, NULL); + if (status) + goto err_unroll_sched; + + status = ice_init_fltr_mgmt_struct(hw); + if (status) + goto err_unroll_sched; + + /* Get MAC information */ + /* A single port can report up to two (LAN and WoL) addresses */ + mac_buf = devm_kcalloc(ice_hw_to_dev(hw), 2, + sizeof(struct ice_aqc_manage_mac_read_resp), + GFP_KERNEL); + mac_buf_len = 2 * sizeof(struct ice_aqc_manage_mac_read_resp); + + if (!mac_buf) { + status = ICE_ERR_NO_MEMORY; + goto err_unroll_fltr_mgmt_struct; + } + + status = ice_aq_manage_mac_read(hw, mac_buf, mac_buf_len, NULL); + devm_kfree(ice_hw_to_dev(hw), mac_buf); + + if (status) + goto err_unroll_fltr_mgmt_struct; + + ice_init_flex_parser(hw); + + return 0; + +err_unroll_fltr_mgmt_struct: + ice_cleanup_fltr_mgmt_struct(hw); +err_unroll_sched: + ice_sched_cleanup_all(hw); +err_unroll_alloc: + devm_kfree(ice_hw_to_dev(hw), hw->port_info); +err_unroll_cqinit: + ice_shutdown_all_ctrlq(hw); + return status; +} + +/** + * ice_deinit_hw - unroll initialization operations done by ice_init_hw + * @hw: pointer to the hardware structure + */ +void ice_deinit_hw(struct ice_hw *hw) +{ + ice_sched_cleanup_all(hw); + ice_shutdown_all_ctrlq(hw); + + if (hw->port_info) { + devm_kfree(ice_hw_to_dev(hw), hw->port_info); + hw->port_info = NULL; + } + + ice_cleanup_fltr_mgmt_struct(hw); +} + +/** + * ice_check_reset - Check to see if a global reset is complete + * @hw: pointer to the hardware structure + */ +enum ice_status ice_check_reset(struct ice_hw *hw) +{ + u32 cnt, reg = 0, grst_delay; + + /* Poll for Device Active state in case a recent CORER, GLOBR, + * or EMPR has occurred. The grst delay value is in 100ms units. + * Add 1sec for outstanding AQ commands that can take a long time. + */ + grst_delay = ((rd32(hw, GLGEN_RSTCTL) & GLGEN_RSTCTL_GRSTDEL_M) >> + GLGEN_RSTCTL_GRSTDEL_S) + 10; + + for (cnt = 0; cnt < grst_delay; cnt++) { + mdelay(100); + reg = rd32(hw, GLGEN_RSTAT); + if (!(reg & GLGEN_RSTAT_DEVSTATE_M)) + break; + } + + if (cnt == grst_delay) { + ice_debug(hw, ICE_DBG_INIT, + "Global reset polling failed to complete.\n"); + return ICE_ERR_RESET_FAILED; + } + +#define ICE_RESET_DONE_MASK (GLNVM_ULD_CORER_DONE_M | \ + GLNVM_ULD_GLOBR_DONE_M) + + /* Device is Active; check Global Reset processes are done */ + for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) { + reg = rd32(hw, GLNVM_ULD) & ICE_RESET_DONE_MASK; + if (reg == ICE_RESET_DONE_MASK) { + ice_debug(hw, ICE_DBG_INIT, + "Global reset processes done. %d\n", cnt); + break; + } + mdelay(10); + } + + if (cnt == ICE_PF_RESET_WAIT_COUNT) { + ice_debug(hw, ICE_DBG_INIT, + "Wait for Reset Done timed out. GLNVM_ULD = 0x%x\n", + reg); + return ICE_ERR_RESET_FAILED; + } + + return 0; +} + +/** + * ice_pf_reset - Reset the PF + * @hw: pointer to the hardware structure + * + * If a global reset has been triggered, this function checks + * for its completion and then issues the PF reset + */ +static enum ice_status ice_pf_reset(struct ice_hw *hw) +{ + u32 cnt, reg; + + /* If at function entry a global reset was already in progress, i.e. + * state is not 'device active' or any of the reset done bits are not + * set in GLNVM_ULD, there is no need for a PF Reset; poll until the + * global reset is done. + */ + if ((rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_DEVSTATE_M) || + (rd32(hw, GLNVM_ULD) & ICE_RESET_DONE_MASK) ^ ICE_RESET_DONE_MASK) { + /* poll on global reset currently in progress until done */ + if (ice_check_reset(hw)) + return ICE_ERR_RESET_FAILED; + + return 0; + } + + /* Reset the PF */ + reg = rd32(hw, PFGEN_CTRL); + + wr32(hw, PFGEN_CTRL, (reg | PFGEN_CTRL_PFSWR_M)); + + for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) { + reg = rd32(hw, PFGEN_CTRL); + if (!(reg & PFGEN_CTRL_PFSWR_M)) + break; + + mdelay(1); + } + + if (cnt == ICE_PF_RESET_WAIT_COUNT) { + ice_debug(hw, ICE_DBG_INIT, + "PF reset polling failed to complete.\n"); + return ICE_ERR_RESET_FAILED; + } + + return 0; +} + +/** + * ice_reset - Perform different types of reset + * @hw: pointer to the hardware structure + * @req: reset request + * + * This function triggers a reset as specified by the req parameter. + * + * Note: + * If anything other than a PF reset is triggered, PXE mode is restored. + * This has to be cleared using ice_clear_pxe_mode again, once the AQ + * interface has been restored in the rebuild flow. + */ +enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req) +{ + u32 val = 0; + + switch (req) { + case ICE_RESET_PFR: + return ice_pf_reset(hw); + case ICE_RESET_CORER: + ice_debug(hw, ICE_DBG_INIT, "CoreR requested\n"); + val = GLGEN_RTRIG_CORER_M; + break; + case ICE_RESET_GLOBR: + ice_debug(hw, ICE_DBG_INIT, "GlobalR requested\n"); + val = GLGEN_RTRIG_GLOBR_M; + break; + } + + val |= rd32(hw, GLGEN_RTRIG); + wr32(hw, GLGEN_RTRIG, val); + ice_flush(hw); + + /* wait for the FW to be ready */ + return ice_check_reset(hw); +} + +/** + * ice_copy_rxq_ctx_to_hw + * @hw: pointer to the hardware structure + * @ice_rxq_ctx: pointer to the rxq context + * @rxq_index: the index of the rx queue + * + * Copies rxq context from dense structure to hw register space + */ +static enum ice_status +ice_copy_rxq_ctx_to_hw(struct ice_hw *hw, u8 *ice_rxq_ctx, u32 rxq_index) +{ + u8 i; + + if (!ice_rxq_ctx) + return ICE_ERR_BAD_PTR; + + if (rxq_index > QRX_CTRL_MAX_INDEX) + return ICE_ERR_PARAM; + + /* Copy each dword separately to hw */ + for (i = 0; i < ICE_RXQ_CTX_SIZE_DWORDS; i++) { + wr32(hw, QRX_CONTEXT(i, rxq_index), + *((u32 *)(ice_rxq_ctx + (i * sizeof(u32))))); + + ice_debug(hw, ICE_DBG_QCTX, "qrxdata[%d]: %08X\n", i, + *((u32 *)(ice_rxq_ctx + (i * sizeof(u32))))); + } + + return 0; +} + +/* LAN Rx Queue Context */ +static const struct ice_ctx_ele ice_rlan_ctx_info[] = { + /* Field Width LSB */ + ICE_CTX_STORE(ice_rlan_ctx, head, 13, 0), + ICE_CTX_STORE(ice_rlan_ctx, cpuid, 8, 13), + ICE_CTX_STORE(ice_rlan_ctx, base, 57, 32), + ICE_CTX_STORE(ice_rlan_ctx, qlen, 13, 89), + ICE_CTX_STORE(ice_rlan_ctx, dbuf, 7, 102), + ICE_CTX_STORE(ice_rlan_ctx, hbuf, 5, 109), + ICE_CTX_STORE(ice_rlan_ctx, dtype, 2, 114), + ICE_CTX_STORE(ice_rlan_ctx, dsize, 1, 116), + ICE_CTX_STORE(ice_rlan_ctx, crcstrip, 1, 117), + ICE_CTX_STORE(ice_rlan_ctx, l2tsel, 1, 119), + ICE_CTX_STORE(ice_rlan_ctx, hsplit_0, 4, 120), + ICE_CTX_STORE(ice_rlan_ctx, hsplit_1, 2, 124), + ICE_CTX_STORE(ice_rlan_ctx, showiv, 1, 127), + ICE_CTX_STORE(ice_rlan_ctx, rxmax, 14, 174), + ICE_CTX_STORE(ice_rlan_ctx, tphrdesc_ena, 1, 193), + ICE_CTX_STORE(ice_rlan_ctx, tphwdesc_ena, 1, 194), + ICE_CTX_STORE(ice_rlan_ctx, tphdata_ena, 1, 195), + ICE_CTX_STORE(ice_rlan_ctx, tphhead_ena, 1, 196), + ICE_CTX_STORE(ice_rlan_ctx, lrxqthresh, 3, 198), + { 0 } +}; + +/** + * ice_write_rxq_ctx + * @hw: pointer to the hardware structure + * @rlan_ctx: pointer to the rxq context + * @rxq_index: the index of the rx queue + * + * Converts rxq context from sparse to dense structure and then writes + * it to hw register space + */ +enum ice_status +ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, + u32 rxq_index) +{ + u8 ctx_buf[ICE_RXQ_CTX_SZ] = { 0 }; + + ice_set_ctx((u8 *)rlan_ctx, ctx_buf, ice_rlan_ctx_info); + return ice_copy_rxq_ctx_to_hw(hw, ctx_buf, rxq_index); +} + +/* LAN Tx Queue Context */ +const struct ice_ctx_ele ice_tlan_ctx_info[] = { + /* Field Width LSB */ + ICE_CTX_STORE(ice_tlan_ctx, base, 57, 0), + ICE_CTX_STORE(ice_tlan_ctx, port_num, 3, 57), + ICE_CTX_STORE(ice_tlan_ctx, cgd_num, 5, 60), + ICE_CTX_STORE(ice_tlan_ctx, pf_num, 3, 65), + ICE_CTX_STORE(ice_tlan_ctx, vmvf_num, 10, 68), + ICE_CTX_STORE(ice_tlan_ctx, vmvf_type, 2, 78), + ICE_CTX_STORE(ice_tlan_ctx, src_vsi, 10, 80), + ICE_CTX_STORE(ice_tlan_ctx, tsyn_ena, 1, 90), + ICE_CTX_STORE(ice_tlan_ctx, alt_vlan, 1, 92), + ICE_CTX_STORE(ice_tlan_ctx, cpuid, 8, 93), + ICE_CTX_STORE(ice_tlan_ctx, wb_mode, 1, 101), + ICE_CTX_STORE(ice_tlan_ctx, tphrd_desc, 1, 102), + ICE_CTX_STORE(ice_tlan_ctx, tphrd, 1, 103), + ICE_CTX_STORE(ice_tlan_ctx, tphwr_desc, 1, 104), + ICE_CTX_STORE(ice_tlan_ctx, cmpq_id, 9, 105), + ICE_CTX_STORE(ice_tlan_ctx, qnum_in_func, 14, 114), + ICE_CTX_STORE(ice_tlan_ctx, itr_notification_mode, 1, 128), + ICE_CTX_STORE(ice_tlan_ctx, adjust_prof_id, 6, 129), + ICE_CTX_STORE(ice_tlan_ctx, qlen, 13, 135), + ICE_CTX_STORE(ice_tlan_ctx, quanta_prof_idx, 4, 148), + ICE_CTX_STORE(ice_tlan_ctx, tso_ena, 1, 152), + ICE_CTX_STORE(ice_tlan_ctx, tso_qnum, 11, 153), + ICE_CTX_STORE(ice_tlan_ctx, legacy_int, 1, 164), + ICE_CTX_STORE(ice_tlan_ctx, drop_ena, 1, 165), + ICE_CTX_STORE(ice_tlan_ctx, cache_prof_idx, 2, 166), + ICE_CTX_STORE(ice_tlan_ctx, pkt_shaper_prof_idx, 3, 168), + ICE_CTX_STORE(ice_tlan_ctx, int_q_state, 110, 171), + { 0 } +}; + +/** + * ice_debug_cq + * @hw: pointer to the hardware structure + * @mask: debug mask + * @desc: pointer to control queue descriptor + * @buf: pointer to command buffer + * @buf_len: max length of buf + * + * Dumps debug log about control command with descriptor contents. + */ +void ice_debug_cq(struct ice_hw *hw, u32 __maybe_unused mask, void *desc, + void *buf, u16 buf_len) +{ + struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc; + u16 len; + +#ifndef CONFIG_DYNAMIC_DEBUG + if (!(mask & hw->debug_mask)) + return; +#endif + + if (!desc) + return; + + len = le16_to_cpu(cq_desc->datalen); + + ice_debug(hw, mask, + "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n", + le16_to_cpu(cq_desc->opcode), + le16_to_cpu(cq_desc->flags), + le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval)); + ice_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n", + le32_to_cpu(cq_desc->cookie_high), + le32_to_cpu(cq_desc->cookie_low)); + ice_debug(hw, mask, "\tparam (0,1) 0x%08X 0x%08X\n", + le32_to_cpu(cq_desc->params.generic.param0), + le32_to_cpu(cq_desc->params.generic.param1)); + ice_debug(hw, mask, "\taddr (h,l) 0x%08X 0x%08X\n", + le32_to_cpu(cq_desc->params.generic.addr_high), + le32_to_cpu(cq_desc->params.generic.addr_low)); + if (buf && cq_desc->datalen != 0) { + ice_debug(hw, mask, "Buffer:\n"); + if (buf_len < len) + len = buf_len; + + ice_debug_array(hw, mask, 16, 1, (u8 *)buf, len); + } +} + +/* FW Admin Queue command wrappers */ + +/** + * ice_aq_send_cmd - send FW Admin Queue command to FW Admin Queue + * @hw: pointer to the hw struct + * @desc: descriptor describing the command + * @buf: buffer to use for indirect commands (NULL for direct commands) + * @buf_size: size of buffer for indirect commands (0 for direct commands) + * @cd: pointer to command details structure + * + * Helper function to send FW Admin Queue commands to the FW Admin Queue. + */ +enum ice_status +ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf, + u16 buf_size, struct ice_sq_cd *cd) +{ + return ice_sq_send_cmd(hw, &hw->adminq, desc, buf, buf_size, cd); +} + +/** + * ice_aq_get_fw_ver + * @hw: pointer to the hw struct + * @cd: pointer to command details structure or NULL + * + * Get the firmware version (0x0001) from the admin queue commands + */ +enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd) +{ + struct ice_aqc_get_ver *resp; + struct ice_aq_desc desc; + enum ice_status status; + + resp = &desc.params.get_ver; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_ver); + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); + + if (!status) { + hw->fw_branch = resp->fw_branch; + hw->fw_maj_ver = resp->fw_major; + hw->fw_min_ver = resp->fw_minor; + hw->fw_patch = resp->fw_patch; + hw->fw_build = le32_to_cpu(resp->fw_build); + hw->api_branch = resp->api_branch; + hw->api_maj_ver = resp->api_major; + hw->api_min_ver = resp->api_minor; + hw->api_patch = resp->api_patch; + } + + return status; +} + +/** + * ice_aq_q_shutdown + * @hw: pointer to the hw struct + * @unloading: is the driver unloading itself + * + * Tell the Firmware that we're shutting down the AdminQ and whether + * or not the driver is unloading as well (0x0003). + */ +enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading) +{ + struct ice_aqc_q_shutdown *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.q_shutdown; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_q_shutdown); + + if (unloading) + cmd->driver_unloading = cpu_to_le32(ICE_AQC_DRIVER_UNLOADING); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); +} + +/** + * ice_aq_req_res + * @hw: pointer to the hw struct + * @res: resource id + * @access: access type + * @sdp_number: resource number + * @timeout: the maximum time in ms that the driver may hold the resource + * @cd: pointer to command details structure or NULL + * + * Requests common resource using the admin queue commands (0x0008). + * When attempting to acquire the Global Config Lock, the driver can + * learn of three states: + * 1) ICE_SUCCESS - acquired lock, and can perform download package + * 2) ICE_ERR_AQ_ERROR - did not get lock, driver should fail to load + * 3) ICE_ERR_AQ_NO_WORK - did not get lock, but another driver has + * successfully downloaded the package; the driver does + * not have to download the package and can continue + * loading + * + * Note that if the caller is in an acquire lock, perform action, release lock + * phase of operation, it is possible that the FW may detect a timeout and issue + * a CORER. In this case, the driver will receive a CORER interrupt and will + * have to determine its cause. The calling thread that is handling this flow + * will likely get an error propagated back to it indicating the Download + * Package, Update Package or the Release Resource AQ commands timed out. + */ +static enum ice_status +ice_aq_req_res(struct ice_hw *hw, enum ice_aq_res_ids res, + enum ice_aq_res_access_type access, u8 sdp_number, u32 *timeout, + struct ice_sq_cd *cd) +{ + struct ice_aqc_req_res *cmd_resp; + struct ice_aq_desc desc; + enum ice_status status; + + cmd_resp = &desc.params.res_owner; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_req_res); + + cmd_resp->res_id = cpu_to_le16(res); + cmd_resp->access_type = cpu_to_le16(access); + cmd_resp->res_number = cpu_to_le32(sdp_number); + cmd_resp->timeout = cpu_to_le32(*timeout); + *timeout = 0; + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); + + /* The completion specifies the maximum time in ms that the driver + * may hold the resource in the Timeout field. + */ + + /* Global config lock response utilizes an additional status field. + * + * If the Global config lock resource is held by some other driver, the + * command completes with ICE_AQ_RES_GLBL_IN_PROG in the status field + * and the timeout field indicates the maximum time the current owner + * of the resource has to free it. + */ + if (res == ICE_GLOBAL_CFG_LOCK_RES_ID) { + if (le16_to_cpu(cmd_resp->status) == ICE_AQ_RES_GLBL_SUCCESS) { + *timeout = le32_to_cpu(cmd_resp->timeout); + return 0; + } else if (le16_to_cpu(cmd_resp->status) == + ICE_AQ_RES_GLBL_IN_PROG) { + *timeout = le32_to_cpu(cmd_resp->timeout); + return ICE_ERR_AQ_ERROR; + } else if (le16_to_cpu(cmd_resp->status) == + ICE_AQ_RES_GLBL_DONE) { + return ICE_ERR_AQ_NO_WORK; + } + + /* invalid FW response, force a timeout immediately */ + *timeout = 0; + return ICE_ERR_AQ_ERROR; + } + + /* If the resource is held by some other driver, the command completes + * with a busy return value and the timeout field indicates the maximum + * time the current owner of the resource has to free it. + */ + if (!status || hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY) + *timeout = le32_to_cpu(cmd_resp->timeout); + + return status; +} + +/** + * ice_aq_release_res + * @hw: pointer to the hw struct + * @res: resource id + * @sdp_number: resource number + * @cd: pointer to command details structure or NULL + * + * release common resource using the admin queue commands (0x0009) + */ +static enum ice_status +ice_aq_release_res(struct ice_hw *hw, enum ice_aq_res_ids res, u8 sdp_number, + struct ice_sq_cd *cd) +{ + struct ice_aqc_req_res *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.res_owner; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_release_res); + + cmd->res_id = cpu_to_le16(res); + cmd->res_number = cpu_to_le32(sdp_number); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); +} + +/** + * ice_acquire_res + * @hw: pointer to the HW structure + * @res: resource id + * @access: access type (read or write) + * @timeout: timeout in milliseconds + * + * This function will attempt to acquire the ownership of a resource. + */ +enum ice_status +ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res, + enum ice_aq_res_access_type access, u32 timeout) +{ +#define ICE_RES_POLLING_DELAY_MS 10 + u32 delay = ICE_RES_POLLING_DELAY_MS; + u32 time_left = timeout; + enum ice_status status; + + status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL); + + /* A return code of ICE_ERR_AQ_NO_WORK means that another driver has + * previously acquired the resource and performed any necessary updates; + * in this case the caller does not obtain the resource and has no + * further work to do. + */ + if (status == ICE_ERR_AQ_NO_WORK) + goto ice_acquire_res_exit; + + if (status) + ice_debug(hw, ICE_DBG_RES, + "resource %d acquire type %d failed.\n", res, access); + + /* If necessary, poll until the current lock owner timeouts */ + timeout = time_left; + while (status && timeout && time_left) { + mdelay(delay); + timeout = (timeout > delay) ? timeout - delay : 0; + status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL); + + if (status == ICE_ERR_AQ_NO_WORK) + /* lock free, but no work to do */ + break; + + if (!status) + /* lock acquired */ + break; + } + if (status && status != ICE_ERR_AQ_NO_WORK) + ice_debug(hw, ICE_DBG_RES, "resource acquire timed out.\n"); + +ice_acquire_res_exit: + if (status == ICE_ERR_AQ_NO_WORK) { + if (access == ICE_RES_WRITE) + ice_debug(hw, ICE_DBG_RES, + "resource indicates no work to do.\n"); + else + ice_debug(hw, ICE_DBG_RES, + "Warning: ICE_ERR_AQ_NO_WORK not expected\n"); + } + return status; +} + +/** + * ice_release_res + * @hw: pointer to the HW structure + * @res: resource id + * + * This function will release a resource using the proper Admin Command. + */ +void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res) +{ + enum ice_status status; + u32 total_delay = 0; + + status = ice_aq_release_res(hw, res, 0, NULL); + + /* there are some rare cases when trying to release the resource + * results in an admin Q timeout, so handle them correctly + */ + while ((status == ICE_ERR_AQ_TIMEOUT) && + (total_delay < hw->adminq.sq_cmd_timeout)) { + mdelay(1); + status = ice_aq_release_res(hw, res, 0, NULL); + total_delay++; + } +} + +/** + * ice_parse_caps - parse function/device capabilities + * @hw: pointer to the hw struct + * @buf: pointer to a buffer containing function/device capability records + * @cap_count: number of capability records in the list + * @opc: type of capabilities list to parse + * + * Helper function to parse function(0x000a)/device(0x000b) capabilities list. + */ +static void +ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count, + enum ice_adminq_opc opc) +{ + struct ice_aqc_list_caps_elem *cap_resp; + struct ice_hw_func_caps *func_p = NULL; + struct ice_hw_dev_caps *dev_p = NULL; + struct ice_hw_common_caps *caps; + u32 i; + + if (!buf) + return; + + cap_resp = (struct ice_aqc_list_caps_elem *)buf; + + if (opc == ice_aqc_opc_list_dev_caps) { + dev_p = &hw->dev_caps; + caps = &dev_p->common_cap; + } else if (opc == ice_aqc_opc_list_func_caps) { + func_p = &hw->func_caps; + caps = &func_p->common_cap; + } else { + ice_debug(hw, ICE_DBG_INIT, "wrong opcode\n"); + return; + } + + for (i = 0; caps && i < cap_count; i++, cap_resp++) { + u32 logical_id = le32_to_cpu(cap_resp->logical_id); + u32 phys_id = le32_to_cpu(cap_resp->phys_id); + u32 number = le32_to_cpu(cap_resp->number); + u16 cap = le16_to_cpu(cap_resp->cap); + + switch (cap) { + case ICE_AQC_CAPS_VSI: + if (dev_p) { + dev_p->num_vsi_allocd_to_host = number; + ice_debug(hw, ICE_DBG_INIT, + "HW caps: Dev.VSI cnt = %d\n", + dev_p->num_vsi_allocd_to_host); + } else if (func_p) { + func_p->guaranteed_num_vsi = number; + ice_debug(hw, ICE_DBG_INIT, + "HW caps: Func.VSI cnt = %d\n", + func_p->guaranteed_num_vsi); + } + break; + case ICE_AQC_CAPS_RSS: + caps->rss_table_size = number; + caps->rss_table_entry_width = logical_id; + ice_debug(hw, ICE_DBG_INIT, + "HW caps: RSS table size = %d\n", + caps->rss_table_size); + ice_debug(hw, ICE_DBG_INIT, + "HW caps: RSS table width = %d\n", + caps->rss_table_entry_width); + break; + case ICE_AQC_CAPS_RXQS: + caps->num_rxq = number; + caps->rxq_first_id = phys_id; + ice_debug(hw, ICE_DBG_INIT, + "HW caps: Num Rx Qs = %d\n", caps->num_rxq); + ice_debug(hw, ICE_DBG_INIT, + "HW caps: Rx first queue ID = %d\n", + caps->rxq_first_id); + break; + case ICE_AQC_CAPS_TXQS: + caps->num_txq = number; + caps->txq_first_id = phys_id; + ice_debug(hw, ICE_DBG_INIT, + "HW caps: Num Tx Qs = %d\n", caps->num_txq); + ice_debug(hw, ICE_DBG_INIT, + "HW caps: Tx first queue ID = %d\n", + caps->txq_first_id); + break; + case ICE_AQC_CAPS_MSIX: + caps->num_msix_vectors = number; + caps->msix_vector_first_id = phys_id; + ice_debug(hw, ICE_DBG_INIT, + "HW caps: MSIX vector count = %d\n", + caps->num_msix_vectors); + ice_debug(hw, ICE_DBG_INIT, + "HW caps: MSIX first vector index = %d\n", + caps->msix_vector_first_id); + break; + case ICE_AQC_CAPS_MAX_MTU: + caps->max_mtu = number; + if (dev_p) + ice_debug(hw, ICE_DBG_INIT, + "HW caps: Dev.MaxMTU = %d\n", + caps->max_mtu); + else if (func_p) + ice_debug(hw, ICE_DBG_INIT, + "HW caps: func.MaxMTU = %d\n", + caps->max_mtu); + break; + default: + ice_debug(hw, ICE_DBG_INIT, + "HW caps: Unknown capability[%d]: 0x%x\n", i, + cap); + break; + } + } +} + +/** + * ice_aq_discover_caps - query function/device capabilities + * @hw: pointer to the hw struct + * @buf: a virtual buffer to hold the capabilities + * @buf_size: Size of the virtual buffer + * @data_size: Size of the returned data, or buf size needed if AQ err==ENOMEM + * @opc: capabilities type to discover - pass in the command opcode + * @cd: pointer to command details structure or NULL + * + * Get the function(0x000a)/device(0x000b) capabilities description from + * the firmware. + */ +static enum ice_status +ice_aq_discover_caps(struct ice_hw *hw, void *buf, u16 buf_size, u16 *data_size, + enum ice_adminq_opc opc, struct ice_sq_cd *cd) +{ + struct ice_aqc_list_caps *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.get_cap; + + if (opc != ice_aqc_opc_list_func_caps && + opc != ice_aqc_opc_list_dev_caps) + return ICE_ERR_PARAM; + + ice_fill_dflt_direct_cmd_desc(&desc, opc); + + status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); + if (!status) + ice_parse_caps(hw, buf, le32_to_cpu(cmd->count), opc); + *data_size = le16_to_cpu(desc.datalen); + + return status; +} + +/** + * ice_get_caps - get info about the HW + * @hw: pointer to the hardware structure + */ +enum ice_status ice_get_caps(struct ice_hw *hw) +{ + enum ice_status status; + u16 data_size = 0; + u16 cbuf_len; + u8 retries; + + /* The driver doesn't know how many capabilities the device will return + * so the buffer size required isn't known ahead of time. The driver + * starts with cbuf_len and if this turns out to be insufficient, the + * device returns ICE_AQ_RC_ENOMEM and also the buffer size it needs. + * The driver then allocates the buffer of this size and retries the + * operation. So it follows that the retry count is 2. + */ +#define ICE_GET_CAP_BUF_COUNT 40 +#define ICE_GET_CAP_RETRY_COUNT 2 + + cbuf_len = ICE_GET_CAP_BUF_COUNT * + sizeof(struct ice_aqc_list_caps_elem); + + retries = ICE_GET_CAP_RETRY_COUNT; + + do { + void *cbuf; + + cbuf = devm_kzalloc(ice_hw_to_dev(hw), cbuf_len, GFP_KERNEL); + if (!cbuf) + return ICE_ERR_NO_MEMORY; + + status = ice_aq_discover_caps(hw, cbuf, cbuf_len, &data_size, + ice_aqc_opc_list_func_caps, NULL); + devm_kfree(ice_hw_to_dev(hw), cbuf); + + if (!status || hw->adminq.sq_last_status != ICE_AQ_RC_ENOMEM) + break; + + /* If ENOMEM is returned, try again with bigger buffer */ + cbuf_len = data_size; + } while (--retries); + + return status; +} + +/** + * ice_aq_manage_mac_write - manage MAC address write command + * @hw: pointer to the hw struct + * @mac_addr: MAC address to be written as LAA/LAA+WoL/Port address + * @flags: flags to control write behavior + * @cd: pointer to command details structure or NULL + * + * This function is used to write MAC address to the NVM (0x0108). + */ +enum ice_status +ice_aq_manage_mac_write(struct ice_hw *hw, u8 *mac_addr, u8 flags, + struct ice_sq_cd *cd) +{ + struct ice_aqc_manage_mac_write *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.mac_write; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_manage_mac_write); + + cmd->flags = flags; + + /* Prep values for flags, sah, sal */ + cmd->sah = htons(*((u16 *)mac_addr)); + cmd->sal = htonl(*((u32 *)(mac_addr + 2))); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); +} + +/** + * ice_aq_clear_pxe_mode + * @hw: pointer to the hw struct + * + * Tell the firmware that the driver is taking over from PXE (0x0110). + */ +static enum ice_status ice_aq_clear_pxe_mode(struct ice_hw *hw) +{ + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_clear_pxe_mode); + desc.params.clear_pxe.rx_cnt = ICE_AQC_CLEAR_PXE_RX_CNT; + + return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); +} + +/** + * ice_clear_pxe_mode - clear pxe operations mode + * @hw: pointer to the hw struct + * + * Make sure all PXE mode settings are cleared, including things + * like descriptor fetch/write-back mode. + */ +void ice_clear_pxe_mode(struct ice_hw *hw) +{ + if (ice_check_sq_alive(hw, &hw->adminq)) + ice_aq_clear_pxe_mode(hw); +} + +/** + * ice_aq_set_phy_cfg + * @hw: pointer to the hw struct + * @lport: logical port number + * @cfg: structure with PHY configuration data to be set + * @cd: pointer to command details structure or NULL + * + * Set the various PHY configuration parameters supported on the Port. + * One or more of the Set PHY config parameters may be ignored in an MFP + * mode as the PF may not have the privilege to set some of the PHY Config + * parameters. This status will be indicated by the command response (0x0601). + */ +static enum ice_status +ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport, + struct ice_aqc_set_phy_cfg_data *cfg, struct ice_sq_cd *cd) +{ + struct ice_aqc_set_phy_cfg *cmd; + struct ice_aq_desc desc; + + if (!cfg) + return ICE_ERR_PARAM; + + cmd = &desc.params.set_phy; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_phy_cfg); + cmd->lport_num = lport; + + return ice_aq_send_cmd(hw, &desc, cfg, sizeof(*cfg), cd); +} + +/** + * ice_update_link_info - update status of the HW network link + * @pi: port info structure of the interested logical port + */ +static enum ice_status +ice_update_link_info(struct ice_port_info *pi) +{ + struct ice_aqc_get_phy_caps_data *pcaps; + struct ice_phy_info *phy_info; + enum ice_status status; + struct ice_hw *hw; + + if (!pi) + return ICE_ERR_PARAM; + + hw = pi->hw; + + pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL); + if (!pcaps) + return ICE_ERR_NO_MEMORY; + + phy_info = &pi->phy; + status = ice_aq_get_link_info(pi, true, NULL, NULL); + if (status) + goto out; + + if (phy_info->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, + pcaps, NULL); + if (status) + goto out; + + memcpy(phy_info->link_info.module_type, &pcaps->module_type, + sizeof(phy_info->link_info.module_type)); + } +out: + devm_kfree(ice_hw_to_dev(hw), pcaps); + return status; +} + +/** + * ice_set_fc + * @pi: port information structure + * @aq_failures: pointer to status code, specific to ice_set_fc routine + * @atomic_restart: enable automatic link update + * + * Set the requested flow control mode. + */ +enum ice_status +ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool atomic_restart) +{ + struct ice_aqc_set_phy_cfg_data cfg = { 0 }; + struct ice_aqc_get_phy_caps_data *pcaps; + enum ice_status status; + u8 pause_mask = 0x0; + struct ice_hw *hw; + + if (!pi) + return ICE_ERR_PARAM; + hw = pi->hw; + *aq_failures = ICE_SET_FC_AQ_FAIL_NONE; + + switch (pi->fc.req_mode) { + case ICE_FC_FULL: + pause_mask |= ICE_AQC_PHY_EN_TX_LINK_PAUSE; + pause_mask |= ICE_AQC_PHY_EN_RX_LINK_PAUSE; + break; + case ICE_FC_RX_PAUSE: + pause_mask |= ICE_AQC_PHY_EN_RX_LINK_PAUSE; + break; + case ICE_FC_TX_PAUSE: + pause_mask |= ICE_AQC_PHY_EN_TX_LINK_PAUSE; + break; + default: + break; + } + + pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL); + if (!pcaps) + return ICE_ERR_NO_MEMORY; + + /* Get the current phy config */ + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + NULL); + if (status) { + *aq_failures = ICE_SET_FC_AQ_FAIL_GET; + goto out; + } + + /* clear the old pause settings */ + cfg.caps = pcaps->caps & ~(ICE_AQC_PHY_EN_TX_LINK_PAUSE | + ICE_AQC_PHY_EN_RX_LINK_PAUSE); + /* set the new capabilities */ + cfg.caps |= pause_mask; + /* If the capabilities have changed, then set the new config */ + if (cfg.caps != pcaps->caps) { + int retry_count, retry_max = 10; + + /* Auto restart link so settings take effect */ + if (atomic_restart) + cfg.caps |= ICE_AQ_PHY_ENA_ATOMIC_LINK; + /* Copy over all the old settings */ + cfg.phy_type_low = pcaps->phy_type_low; + cfg.low_power_ctrl = pcaps->low_power_ctrl; + cfg.eee_cap = pcaps->eee_cap; + cfg.eeer_value = pcaps->eeer_value; + cfg.link_fec_opt = pcaps->link_fec_options; + + status = ice_aq_set_phy_cfg(hw, pi->lport, &cfg, NULL); + if (status) { + *aq_failures = ICE_SET_FC_AQ_FAIL_SET; + goto out; + } + + /* Update the link info + * It sometimes takes a really long time for link to + * come back from the atomic reset. Thus, we wait a + * little bit. + */ + for (retry_count = 0; retry_count < retry_max; retry_count++) { + status = ice_update_link_info(pi); + + if (!status) + break; + + mdelay(100); + } + + if (status) + *aq_failures = ICE_SET_FC_AQ_FAIL_UPDATE; + } + +out: + devm_kfree(ice_hw_to_dev(hw), pcaps); + return status; +} + +/** + * ice_get_link_status - get status of the HW network link + * @pi: port information structure + * @link_up: pointer to bool (true/false = linkup/linkdown) + * + * Variable link_up is true if link is up, false if link is down. + * The variable link_up is invalid if status is non zero. As a + * result of this call, link status reporting becomes enabled + */ +enum ice_status ice_get_link_status(struct ice_port_info *pi, bool *link_up) +{ + struct ice_phy_info *phy_info; + enum ice_status status = 0; + + if (!pi || !link_up) + return ICE_ERR_PARAM; + + phy_info = &pi->phy; + + if (phy_info->get_link_info) { + status = ice_update_link_info(pi); + + if (status) + ice_debug(pi->hw, ICE_DBG_LINK, + "get link status error, status = %d\n", + status); + } + + *link_up = phy_info->link_info.link_info & ICE_AQ_LINK_UP; + + return status; +} + +/** + * ice_aq_set_link_restart_an + * @pi: pointer to the port information structure + * @ena_link: if true: enable link, if false: disable link + * @cd: pointer to command details structure or NULL + * + * Sets up the link and restarts the Auto-Negotiation over the link. + */ +enum ice_status +ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link, + struct ice_sq_cd *cd) +{ + struct ice_aqc_restart_an *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.restart_an; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_restart_an); + + cmd->cmd_flags = ICE_AQC_RESTART_AN_LINK_RESTART; + cmd->lport_num = pi->lport; + if (ena_link) + cmd->cmd_flags |= ICE_AQC_RESTART_AN_LINK_ENABLE; + else + cmd->cmd_flags &= ~ICE_AQC_RESTART_AN_LINK_ENABLE; + + return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd); +} + +/** + * ice_aq_set_event_mask + * @hw: pointer to the hw struct + * @port_num: port number of the physical function + * @mask: event mask to be set + * @cd: pointer to command details structure or NULL + * + * Set event mask (0x0613) + */ +enum ice_status +ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask, + struct ice_sq_cd *cd) +{ + struct ice_aqc_set_event_mask *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.set_event_mask; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_event_mask); + + cmd->lport_num = port_num; + + cmd->event_mask = cpu_to_le16(mask); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); +} + +/** + * __ice_aq_get_set_rss_lut + * @hw: pointer to the hardware structure + * @vsi_id: VSI FW index + * @lut_type: LUT table type + * @lut: pointer to the LUT buffer provided by the caller + * @lut_size: size of the LUT buffer + * @glob_lut_idx: global LUT index + * @set: set true to set the table, false to get the table + * + * Internal function to get (0x0B05) or set (0x0B03) RSS look up table + */ +static enum ice_status +__ice_aq_get_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut, + u16 lut_size, u8 glob_lut_idx, bool set) +{ + struct ice_aqc_get_set_rss_lut *cmd_resp; + struct ice_aq_desc desc; + enum ice_status status; + u16 flags = 0; + + cmd_resp = &desc.params.get_set_rss_lut; + + if (set) { + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_rss_lut); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + } else { + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_rss_lut); + } + + cmd_resp->vsi_id = cpu_to_le16(((vsi_id << + ICE_AQC_GSET_RSS_LUT_VSI_ID_S) & + ICE_AQC_GSET_RSS_LUT_VSI_ID_M) | + ICE_AQC_GSET_RSS_LUT_VSI_VALID); + + switch (lut_type) { + case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI: + case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF: + case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL: + flags |= ((lut_type << ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S) & + ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_M); + break; + default: + status = ICE_ERR_PARAM; + goto ice_aq_get_set_rss_lut_exit; + } + + if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL) { + flags |= ((glob_lut_idx << ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S) & + ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_M); + + if (!set) + goto ice_aq_get_set_rss_lut_send; + } else if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF) { + if (!set) + goto ice_aq_get_set_rss_lut_send; + } else { + goto ice_aq_get_set_rss_lut_send; + } + + /* LUT size is only valid for Global and PF table types */ + switch (lut_size) { + case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128: + break; + case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512: + flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512_FLAG << + ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) & + ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M; + break; + case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K: + if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF) { + flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG << + ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) & + ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M; + break; + } + /* fall-through */ + default: + status = ICE_ERR_PARAM; + goto ice_aq_get_set_rss_lut_exit; + } + +ice_aq_get_set_rss_lut_send: + cmd_resp->flags = cpu_to_le16(flags); + status = ice_aq_send_cmd(hw, &desc, lut, lut_size, NULL); + +ice_aq_get_set_rss_lut_exit: + return status; +} + +/** + * ice_aq_get_rss_lut + * @hw: pointer to the hardware structure + * @vsi_id: VSI FW index + * @lut_type: LUT table type + * @lut: pointer to the LUT buffer provided by the caller + * @lut_size: size of the LUT buffer + * + * get the RSS lookup table, PF or VSI type + */ +enum ice_status +ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut, + u16 lut_size) +{ + return __ice_aq_get_set_rss_lut(hw, vsi_id, lut_type, lut, lut_size, 0, + false); +} + +/** + * ice_aq_set_rss_lut + * @hw: pointer to the hardware structure + * @vsi_id: VSI FW index + * @lut_type: LUT table type + * @lut: pointer to the LUT buffer provided by the caller + * @lut_size: size of the LUT buffer + * + * set the RSS lookup table, PF or VSI type + */ +enum ice_status +ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut, + u16 lut_size) +{ + return __ice_aq_get_set_rss_lut(hw, vsi_id, lut_type, lut, lut_size, 0, + true); +} + +/** + * __ice_aq_get_set_rss_key + * @hw: pointer to the hw struct + * @vsi_id: VSI FW index + * @key: pointer to key info struct + * @set: set true to set the key, false to get the key + * + * get (0x0B04) or set (0x0B02) the RSS key per VSI + */ +static enum +ice_status __ice_aq_get_set_rss_key(struct ice_hw *hw, u16 vsi_id, + struct ice_aqc_get_set_rss_keys *key, + bool set) +{ + struct ice_aqc_get_set_rss_key *cmd_resp; + u16 key_size = sizeof(*key); + struct ice_aq_desc desc; + + cmd_resp = &desc.params.get_set_rss_key; + + if (set) { + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_rss_key); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + } else { + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_rss_key); + } + + cmd_resp->vsi_id = cpu_to_le16(((vsi_id << + ICE_AQC_GSET_RSS_KEY_VSI_ID_S) & + ICE_AQC_GSET_RSS_KEY_VSI_ID_M) | + ICE_AQC_GSET_RSS_KEY_VSI_VALID); + + return ice_aq_send_cmd(hw, &desc, key, key_size, NULL); +} + +/** + * ice_aq_get_rss_key + * @hw: pointer to the hw struct + * @vsi_id: VSI FW index + * @key: pointer to key info struct + * + * get the RSS key per VSI + */ +enum ice_status +ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_id, + struct ice_aqc_get_set_rss_keys *key) +{ + return __ice_aq_get_set_rss_key(hw, vsi_id, key, false); +} + +/** + * ice_aq_set_rss_key + * @hw: pointer to the hw struct + * @vsi_id: VSI FW index + * @keys: pointer to key info struct + * + * set the RSS key per VSI + */ +enum ice_status +ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_id, + struct ice_aqc_get_set_rss_keys *keys) +{ + return __ice_aq_get_set_rss_key(hw, vsi_id, keys, true); +} + +/** + * ice_aq_add_lan_txq + * @hw: pointer to the hardware structure + * @num_qgrps: Number of added queue groups + * @qg_list: list of queue groups to be added + * @buf_size: size of buffer for indirect command + * @cd: pointer to command details structure or NULL + * + * Add Tx LAN queue (0x0C30) + * + * NOTE: + * Prior to calling add Tx LAN queue: + * Initialize the following as part of the Tx queue context: + * Completion queue ID if the queue uses Completion queue, Quanta profile, + * Cache profile and Packet shaper profile. + * + * After add Tx LAN queue AQ command is completed: + * Interrupts should be associated with specific queues, + * Association of Tx queue to Doorbell queue is not part of Add LAN Tx queue + * flow. + */ +static enum ice_status +ice_aq_add_lan_txq(struct ice_hw *hw, u8 num_qgrps, + struct ice_aqc_add_tx_qgrp *qg_list, u16 buf_size, + struct ice_sq_cd *cd) +{ + u16 i, sum_header_size, sum_q_size = 0; + struct ice_aqc_add_tx_qgrp *list; + struct ice_aqc_add_txqs *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.add_txqs; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_txqs); + + if (!qg_list) + return ICE_ERR_PARAM; + + if (num_qgrps > ICE_LAN_TXQ_MAX_QGRPS) + return ICE_ERR_PARAM; + + sum_header_size = num_qgrps * + (sizeof(*qg_list) - sizeof(*qg_list->txqs)); + + list = qg_list; + for (i = 0; i < num_qgrps; i++) { + struct ice_aqc_add_txqs_perq *q = list->txqs; + + sum_q_size += list->num_txqs * sizeof(*q); + list = (struct ice_aqc_add_tx_qgrp *)(q + list->num_txqs); + } + + if (buf_size != (sum_header_size + sum_q_size)) + return ICE_ERR_PARAM; + + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + cmd->num_qgrps = num_qgrps; + + return ice_aq_send_cmd(hw, &desc, qg_list, buf_size, cd); +} + +/** + * ice_aq_dis_lan_txq + * @hw: pointer to the hardware structure + * @num_qgrps: number of groups in the list + * @qg_list: the list of groups to disable + * @buf_size: the total size of the qg_list buffer in bytes + * @cd: pointer to command details structure or NULL + * + * Disable LAN Tx queue (0x0C31) + */ +static enum ice_status +ice_aq_dis_lan_txq(struct ice_hw *hw, u8 num_qgrps, + struct ice_aqc_dis_txq_item *qg_list, u16 buf_size, + struct ice_sq_cd *cd) +{ + struct ice_aqc_dis_txqs *cmd; + struct ice_aq_desc desc; + u16 i, sz = 0; + + cmd = &desc.params.dis_txqs; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_dis_txqs); + + if (!qg_list) + return ICE_ERR_PARAM; + + if (num_qgrps > ICE_LAN_TXQ_MAX_QGRPS) + return ICE_ERR_PARAM; + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + cmd->num_entries = num_qgrps; + + for (i = 0; i < num_qgrps; ++i) { + /* Calculate the size taken up by the queue IDs in this group */ + sz += qg_list[i].num_qs * sizeof(qg_list[i].q_id); + + /* Add the size of the group header */ + sz += sizeof(qg_list[i]) - sizeof(qg_list[i].q_id); + + /* If the num of queues is even, add 2 bytes of padding */ + if ((qg_list[i].num_qs % 2) == 0) + sz += 2; + } + + if (buf_size != sz) + return ICE_ERR_PARAM; + + return ice_aq_send_cmd(hw, &desc, qg_list, buf_size, cd); +} + +/* End of FW Admin Queue command wrappers */ + +/** + * ice_write_byte - write a byte to a packed context structure + * @src_ctx: the context structure to read from + * @dest_ctx: the context to be written to + * @ce_info: a description of the struct to be filled + */ +static void ice_write_byte(u8 *src_ctx, u8 *dest_ctx, + const struct ice_ctx_ele *ce_info) +{ + u8 src_byte, dest_byte, mask; + u8 *from, *dest; + u16 shift_width; + + /* copy from the next struct field */ + from = src_ctx + ce_info->offset; + + /* prepare the bits and mask */ + shift_width = ce_info->lsb % 8; + mask = (u8)(BIT(ce_info->width) - 1); + + src_byte = *from; + src_byte &= mask; + + /* shift to correct alignment */ + mask <<= shift_width; + src_byte <<= shift_width; + + /* get the current bits from the target bit string */ + dest = dest_ctx + (ce_info->lsb / 8); + + memcpy(&dest_byte, dest, sizeof(dest_byte)); + + dest_byte &= ~mask; /* get the bits not changing */ + dest_byte |= src_byte; /* add in the new bits */ + + /* put it all back */ + memcpy(dest, &dest_byte, sizeof(dest_byte)); +} + +/** + * ice_write_word - write a word to a packed context structure + * @src_ctx: the context structure to read from + * @dest_ctx: the context to be written to + * @ce_info: a description of the struct to be filled + */ +static void ice_write_word(u8 *src_ctx, u8 *dest_ctx, + const struct ice_ctx_ele *ce_info) +{ + u16 src_word, mask; + __le16 dest_word; + u8 *from, *dest; + u16 shift_width; + + /* copy from the next struct field */ + from = src_ctx + ce_info->offset; + + /* prepare the bits and mask */ + shift_width = ce_info->lsb % 8; + mask = BIT(ce_info->width) - 1; + + /* don't swizzle the bits until after the mask because the mask bits + * will be in a different bit position on big endian machines + */ + src_word = *(u16 *)from; + src_word &= mask; + + /* shift to correct alignment */ + mask <<= shift_width; + src_word <<= shift_width; + + /* get the current bits from the target bit string */ + dest = dest_ctx + (ce_info->lsb / 8); + + memcpy(&dest_word, dest, sizeof(dest_word)); + + dest_word &= ~(cpu_to_le16(mask)); /* get the bits not changing */ + dest_word |= cpu_to_le16(src_word); /* add in the new bits */ + + /* put it all back */ + memcpy(dest, &dest_word, sizeof(dest_word)); +} + +/** + * ice_write_dword - write a dword to a packed context structure + * @src_ctx: the context structure to read from + * @dest_ctx: the context to be written to + * @ce_info: a description of the struct to be filled + */ +static void ice_write_dword(u8 *src_ctx, u8 *dest_ctx, + const struct ice_ctx_ele *ce_info) +{ + u32 src_dword, mask; + __le32 dest_dword; + u8 *from, *dest; + u16 shift_width; + + /* copy from the next struct field */ + from = src_ctx + ce_info->offset; + + /* prepare the bits and mask */ + shift_width = ce_info->lsb % 8; + + /* if the field width is exactly 32 on an x86 machine, then the shift + * operation will not work because the SHL instructions count is masked + * to 5 bits so the shift will do nothing + */ + if (ce_info->width < 32) + mask = BIT(ce_info->width) - 1; + else + mask = (u32)~0; + + /* don't swizzle the bits until after the mask because the mask bits + * will be in a different bit position on big endian machines + */ + src_dword = *(u32 *)from; + src_dword &= mask; + + /* shift to correct alignment */ + mask <<= shift_width; + src_dword <<= shift_width; + + /* get the current bits from the target bit string */ + dest = dest_ctx + (ce_info->lsb / 8); + + memcpy(&dest_dword, dest, sizeof(dest_dword)); + + dest_dword &= ~(cpu_to_le32(mask)); /* get the bits not changing */ + dest_dword |= cpu_to_le32(src_dword); /* add in the new bits */ + + /* put it all back */ + memcpy(dest, &dest_dword, sizeof(dest_dword)); +} + +/** + * ice_write_qword - write a qword to a packed context structure + * @src_ctx: the context structure to read from + * @dest_ctx: the context to be written to + * @ce_info: a description of the struct to be filled + */ +static void ice_write_qword(u8 *src_ctx, u8 *dest_ctx, + const struct ice_ctx_ele *ce_info) +{ + u64 src_qword, mask; + __le64 dest_qword; + u8 *from, *dest; + u16 shift_width; + + /* copy from the next struct field */ + from = src_ctx + ce_info->offset; + + /* prepare the bits and mask */ + shift_width = ce_info->lsb % 8; + + /* if the field width is exactly 64 on an x86 machine, then the shift + * operation will not work because the SHL instructions count is masked + * to 6 bits so the shift will do nothing + */ + if (ce_info->width < 64) + mask = BIT_ULL(ce_info->width) - 1; + else + mask = (u64)~0; + + /* don't swizzle the bits until after the mask because the mask bits + * will be in a different bit position on big endian machines + */ + src_qword = *(u64 *)from; + src_qword &= mask; + + /* shift to correct alignment */ + mask <<= shift_width; + src_qword <<= shift_width; + + /* get the current bits from the target bit string */ + dest = dest_ctx + (ce_info->lsb / 8); + + memcpy(&dest_qword, dest, sizeof(dest_qword)); + + dest_qword &= ~(cpu_to_le64(mask)); /* get the bits not changing */ + dest_qword |= cpu_to_le64(src_qword); /* add in the new bits */ + + /* put it all back */ + memcpy(dest, &dest_qword, sizeof(dest_qword)); +} + +/** + * ice_set_ctx - set context bits in packed structure + * @src_ctx: pointer to a generic non-packed context structure + * @dest_ctx: pointer to memory for the packed structure + * @ce_info: a description of the structure to be transformed + */ +enum ice_status +ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) +{ + int f; + + for (f = 0; ce_info[f].width; f++) { + /* We have to deal with each element of the FW response + * using the correct size so that we are correct regardless + * of the endianness of the machine. + */ + switch (ce_info[f].size_of) { + case sizeof(u8): + ice_write_byte(src_ctx, dest_ctx, &ce_info[f]); + break; + case sizeof(u16): + ice_write_word(src_ctx, dest_ctx, &ce_info[f]); + break; + case sizeof(u32): + ice_write_dword(src_ctx, dest_ctx, &ce_info[f]); + break; + case sizeof(u64): + ice_write_qword(src_ctx, dest_ctx, &ce_info[f]); + break; + default: + return ICE_ERR_INVAL_SIZE; + } + } + + return 0; +} + +/** + * ice_ena_vsi_txq + * @pi: port information structure + * @vsi_id: VSI id + * @tc: tc number + * @num_qgrps: Number of added queue groups + * @buf: list of queue groups to be added + * @buf_size: size of buffer for indirect command + * @cd: pointer to command details structure or NULL + * + * This function adds one lan q + */ +enum ice_status +ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_id, u8 tc, u8 num_qgrps, + struct ice_aqc_add_tx_qgrp *buf, u16 buf_size, + struct ice_sq_cd *cd) +{ + struct ice_aqc_txsched_elem_data node = { 0 }; + struct ice_sched_node *parent; + enum ice_status status; + struct ice_hw *hw; + + if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY) + return ICE_ERR_CFG; + + if (num_qgrps > 1 || buf->num_txqs > 1) + return ICE_ERR_MAX_LIMIT; + + hw = pi->hw; + + mutex_lock(&pi->sched_lock); + + /* find a parent node */ + parent = ice_sched_get_free_qparent(pi, vsi_id, tc, + ICE_SCHED_NODE_OWNER_LAN); + if (!parent) { + status = ICE_ERR_PARAM; + goto ena_txq_exit; + } + buf->parent_teid = parent->info.node_teid; + node.parent_teid = parent->info.node_teid; + /* Mark that the values in the "generic" section as valid. The default + * value in the "generic" section is zero. This means that : + * - Scheduling mode is Bytes Per Second (BPS), indicated by Bit 0. + * - 0 priority among siblings, indicated by Bit 1-3. + * - WFQ, indicated by Bit 4. + * - 0 Adjustment value is used in PSM credit update flow, indicated by + * Bit 5-6. + * - Bit 7 is reserved. + * Without setting the generic section as valid in valid_sections, the + * Admin Q command will fail with error code ICE_AQ_RC_EINVAL. + */ + buf->txqs[0].info.valid_sections = ICE_AQC_ELEM_VALID_GENERIC; + + /* add the lan q */ + status = ice_aq_add_lan_txq(hw, num_qgrps, buf, buf_size, cd); + if (status) + goto ena_txq_exit; + + node.node_teid = buf->txqs[0].q_teid; + node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF; + + /* add a leaf node into schduler tree q layer */ + status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node); + +ena_txq_exit: + mutex_unlock(&pi->sched_lock); + return status; +} + +/** + * ice_dis_vsi_txq + * @pi: port information structure + * @num_queues: number of queues + * @q_ids: pointer to the q_id array + * @q_teids: pointer to queue node teids + * @cd: pointer to command details structure or NULL + * + * This function removes queues and their corresponding nodes in SW DB + */ +enum ice_status +ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids, + u32 *q_teids, struct ice_sq_cd *cd) +{ + enum ice_status status = ICE_ERR_DOES_NOT_EXIST; + struct ice_aqc_dis_txq_item qg_list; + u16 i; + + if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY) + return ICE_ERR_CFG; + + mutex_lock(&pi->sched_lock); + + for (i = 0; i < num_queues; i++) { + struct ice_sched_node *node; + + node = ice_sched_find_node_by_teid(pi->root, q_teids[i]); + if (!node) + continue; + qg_list.parent_teid = node->info.parent_teid; + qg_list.num_qs = 1; + qg_list.q_id[0] = cpu_to_le16(q_ids[i]); + status = ice_aq_dis_lan_txq(pi->hw, 1, &qg_list, + sizeof(qg_list), cd); + + if (status) + break; + ice_free_sched_node(pi, node); + } + mutex_unlock(&pi->sched_lock); + return status; +} + +/** + * ice_cfg_vsi_qs - configure the new/exisiting VSI queues + * @pi: port information structure + * @vsi_id: VSI Id + * @tc_bitmap: TC bitmap + * @maxqs: max queues array per TC + * @owner: lan or rdma + * + * This function adds/updates the VSI queues per TC. + */ +static enum ice_status +ice_cfg_vsi_qs(struct ice_port_info *pi, u16 vsi_id, u8 tc_bitmap, + u16 *maxqs, u8 owner) +{ + enum ice_status status = 0; + u8 i; + + if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY) + return ICE_ERR_CFG; + + mutex_lock(&pi->sched_lock); + + for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { + /* configuration is possible only if TC node is present */ + if (!ice_sched_get_tc_node(pi, i)) + continue; + + status = ice_sched_cfg_vsi(pi, vsi_id, i, maxqs[i], owner, + ice_is_tc_ena(tc_bitmap, i)); + if (status) + break; + } + + mutex_unlock(&pi->sched_lock); + return status; +} + +/** + * ice_cfg_vsi_lan - configure VSI lan queues + * @pi: port information structure + * @vsi_id: VSI Id + * @tc_bitmap: TC bitmap + * @max_lanqs: max lan queues array per TC + * + * This function adds/updates the VSI lan queues per TC. + */ +enum ice_status +ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_id, u8 tc_bitmap, + u16 *max_lanqs) +{ + return ice_cfg_vsi_qs(pi, vsi_id, tc_bitmap, max_lanqs, + ICE_SCHED_NODE_OWNER_LAN); +} diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h new file mode 100644 index 000000000..6455b6952 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_COMMON_H_ +#define _ICE_COMMON_H_ + +#include "ice.h" +#include "ice_type.h" +#include "ice_switch.h" + +void ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf, + u16 buf_len); +enum ice_status ice_init_hw(struct ice_hw *hw); +void ice_deinit_hw(struct ice_hw *hw); +enum ice_status ice_check_reset(struct ice_hw *hw); +enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req); +enum ice_status ice_init_all_ctrlq(struct ice_hw *hw); +void ice_shutdown_all_ctrlq(struct ice_hw *hw); +enum ice_status +ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, + struct ice_rq_event_info *e, u16 *pending); +enum ice_status +ice_get_link_status(struct ice_port_info *pi, bool *link_up); +enum ice_status +ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res, + enum ice_aq_res_access_type access, u32 timeout); +void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res); +enum ice_status ice_init_nvm(struct ice_hw *hw); +enum ice_status +ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, + struct ice_aq_desc *desc, void *buf, u16 buf_size, + struct ice_sq_cd *cd); +void ice_clear_pxe_mode(struct ice_hw *hw); +enum ice_status ice_get_caps(struct ice_hw *hw); +enum ice_status +ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, + u32 rxq_index); + +enum ice_status +ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut, + u16 lut_size); +enum ice_status +ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut, + u16 lut_size); +enum ice_status +ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_id, + struct ice_aqc_get_set_rss_keys *keys); +enum ice_status +ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_id, + struct ice_aqc_get_set_rss_keys *keys); +bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq); +enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading); +void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode); +extern const struct ice_ctx_ele ice_tlan_ctx_info[]; +enum ice_status +ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info); +enum ice_status +ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, + void *buf, u16 buf_size, struct ice_sq_cd *cd); +enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd); +enum ice_status +ice_aq_manage_mac_write(struct ice_hw *hw, u8 *mac_addr, u8 flags, + struct ice_sq_cd *cd); +enum ice_status ice_clear_pf_cfg(struct ice_hw *hw); +enum ice_status +ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool atomic_restart); +enum ice_status +ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link, + struct ice_sq_cd *cd); +enum ice_status +ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, + struct ice_link_status *link, struct ice_sq_cd *cd); +enum ice_status +ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask, + struct ice_sq_cd *cd); +enum ice_status +ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids, + u32 *q_teids, struct ice_sq_cd *cmd_details); +enum ice_status +ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_id, u8 tc_bitmap, + u16 *max_lanqs); +enum ice_status +ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_id, u8 tc, u8 num_qgrps, + struct ice_aqc_add_tx_qgrp *buf, u16 buf_size, + struct ice_sq_cd *cd); +#endif /* _ICE_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c new file mode 100644 index 000000000..921cc0c9a --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -0,0 +1,1088 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +#include "ice_common.h" + +/** + * ice_adminq_init_regs - Initialize AdminQ registers + * @hw: pointer to the hardware structure + * + * This assumes the alloc_sq and alloc_rq functions have already been called + */ +static void ice_adminq_init_regs(struct ice_hw *hw) +{ + struct ice_ctl_q_info *cq = &hw->adminq; + + cq->sq.head = PF_FW_ATQH; + cq->sq.tail = PF_FW_ATQT; + cq->sq.len = PF_FW_ATQLEN; + cq->sq.bah = PF_FW_ATQBAH; + cq->sq.bal = PF_FW_ATQBAL; + cq->sq.len_mask = PF_FW_ATQLEN_ATQLEN_M; + cq->sq.len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M; + cq->sq.head_mask = PF_FW_ATQH_ATQH_M; + + cq->rq.head = PF_FW_ARQH; + cq->rq.tail = PF_FW_ARQT; + cq->rq.len = PF_FW_ARQLEN; + cq->rq.bah = PF_FW_ARQBAH; + cq->rq.bal = PF_FW_ARQBAL; + cq->rq.len_mask = PF_FW_ARQLEN_ARQLEN_M; + cq->rq.len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M; + cq->rq.head_mask = PF_FW_ARQH_ARQH_M; +} + +/** + * ice_check_sq_alive + * @hw: pointer to the hw struct + * @cq: pointer to the specific Control queue + * + * Returns true if Queue is enabled else false. + */ +bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + /* check both queue-length and queue-enable fields */ + if (cq->sq.len && cq->sq.len_mask && cq->sq.len_ena_mask) + return (rd32(hw, cq->sq.len) & (cq->sq.len_mask | + cq->sq.len_ena_mask)) == + (cq->num_sq_entries | cq->sq.len_ena_mask); + + return false; +} + +/** + * ice_alloc_ctrlq_sq_ring - Allocate Control Transmit Queue (ATQ) rings + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + */ +static enum ice_status +ice_alloc_ctrlq_sq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + size_t size = cq->num_sq_entries * sizeof(struct ice_aq_desc); + + cq->sq.desc_buf.va = dmam_alloc_coherent(ice_hw_to_dev(hw), size, + &cq->sq.desc_buf.pa, + GFP_KERNEL | __GFP_ZERO); + if (!cq->sq.desc_buf.va) + return ICE_ERR_NO_MEMORY; + cq->sq.desc_buf.size = size; + + cq->sq.cmd_buf = devm_kcalloc(ice_hw_to_dev(hw), cq->num_sq_entries, + sizeof(struct ice_sq_cd), GFP_KERNEL); + if (!cq->sq.cmd_buf) { + dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.desc_buf.size, + cq->sq.desc_buf.va, cq->sq.desc_buf.pa); + cq->sq.desc_buf.va = NULL; + cq->sq.desc_buf.pa = 0; + cq->sq.desc_buf.size = 0; + return ICE_ERR_NO_MEMORY; + } + + return 0; +} + +/** + * ice_alloc_ctrlq_rq_ring - Allocate Control Receive Queue (ARQ) rings + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + */ +static enum ice_status +ice_alloc_ctrlq_rq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + size_t size = cq->num_rq_entries * sizeof(struct ice_aq_desc); + + cq->rq.desc_buf.va = dmam_alloc_coherent(ice_hw_to_dev(hw), size, + &cq->rq.desc_buf.pa, + GFP_KERNEL | __GFP_ZERO); + if (!cq->rq.desc_buf.va) + return ICE_ERR_NO_MEMORY; + cq->rq.desc_buf.size = size; + return 0; +} + +/** + * ice_free_ctrlq_sq_ring - Free Control Transmit Queue (ATQ) rings + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * This assumes the posted send buffers have already been cleaned + * and de-allocated + */ +static void ice_free_ctrlq_sq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.desc_buf.size, + cq->sq.desc_buf.va, cq->sq.desc_buf.pa); + cq->sq.desc_buf.va = NULL; + cq->sq.desc_buf.pa = 0; + cq->sq.desc_buf.size = 0; +} + +/** + * ice_free_ctrlq_rq_ring - Free Control Receive Queue (ARQ) rings + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * This assumes the posted receive buffers have already been cleaned + * and de-allocated + */ +static void ice_free_ctrlq_rq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.desc_buf.size, + cq->rq.desc_buf.va, cq->rq.desc_buf.pa); + cq->rq.desc_buf.va = NULL; + cq->rq.desc_buf.pa = 0; + cq->rq.desc_buf.size = 0; +} + +/** + * ice_alloc_rq_bufs - Allocate pre-posted buffers for the ARQ + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + */ +static enum ice_status +ice_alloc_rq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + int i; + + /* We'll be allocating the buffer info memory first, then we can + * allocate the mapped buffers for the event processing + */ + cq->rq.dma_head = devm_kcalloc(ice_hw_to_dev(hw), cq->num_rq_entries, + sizeof(cq->rq.desc_buf), GFP_KERNEL); + if (!cq->rq.dma_head) + return ICE_ERR_NO_MEMORY; + cq->rq.r.rq_bi = (struct ice_dma_mem *)cq->rq.dma_head; + + /* allocate the mapped buffers */ + for (i = 0; i < cq->num_rq_entries; i++) { + struct ice_aq_desc *desc; + struct ice_dma_mem *bi; + + bi = &cq->rq.r.rq_bi[i]; + bi->va = dmam_alloc_coherent(ice_hw_to_dev(hw), + cq->rq_buf_size, &bi->pa, + GFP_KERNEL | __GFP_ZERO); + if (!bi->va) + goto unwind_alloc_rq_bufs; + bi->size = cq->rq_buf_size; + + /* now configure the descriptors for use */ + desc = ICE_CTL_Q_DESC(cq->rq, i); + + desc->flags = cpu_to_le16(ICE_AQ_FLAG_BUF); + if (cq->rq_buf_size > ICE_AQ_LG_BUF) + desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB); + desc->opcode = 0; + /* This is in accordance with Admin queue design, there is no + * register for buffer size configuration + */ + desc->datalen = cpu_to_le16(bi->size); + desc->retval = 0; + desc->cookie_high = 0; + desc->cookie_low = 0; + desc->params.generic.addr_high = + cpu_to_le32(upper_32_bits(bi->pa)); + desc->params.generic.addr_low = + cpu_to_le32(lower_32_bits(bi->pa)); + desc->params.generic.param0 = 0; + desc->params.generic.param1 = 0; + } + return 0; + +unwind_alloc_rq_bufs: + /* don't try to free the one that failed... */ + i--; + for (; i >= 0; i--) { + dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.r.rq_bi[i].size, + cq->rq.r.rq_bi[i].va, cq->rq.r.rq_bi[i].pa); + cq->rq.r.rq_bi[i].va = NULL; + cq->rq.r.rq_bi[i].pa = 0; + cq->rq.r.rq_bi[i].size = 0; + } + devm_kfree(ice_hw_to_dev(hw), cq->rq.dma_head); + + return ICE_ERR_NO_MEMORY; +} + +/** + * ice_alloc_sq_bufs - Allocate empty buffer structs for the ATQ + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + */ +static enum ice_status +ice_alloc_sq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + int i; + + /* No mapped memory needed yet, just the buffer info structures */ + cq->sq.dma_head = devm_kcalloc(ice_hw_to_dev(hw), cq->num_sq_entries, + sizeof(cq->sq.desc_buf), GFP_KERNEL); + if (!cq->sq.dma_head) + return ICE_ERR_NO_MEMORY; + cq->sq.r.sq_bi = (struct ice_dma_mem *)cq->sq.dma_head; + + /* allocate the mapped buffers */ + for (i = 0; i < cq->num_sq_entries; i++) { + struct ice_dma_mem *bi; + + bi = &cq->sq.r.sq_bi[i]; + bi->va = dmam_alloc_coherent(ice_hw_to_dev(hw), + cq->sq_buf_size, &bi->pa, + GFP_KERNEL | __GFP_ZERO); + if (!bi->va) + goto unwind_alloc_sq_bufs; + bi->size = cq->sq_buf_size; + } + return 0; + +unwind_alloc_sq_bufs: + /* don't try to free the one that failed... */ + i--; + for (; i >= 0; i--) { + dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.r.sq_bi[i].size, + cq->sq.r.sq_bi[i].va, cq->sq.r.sq_bi[i].pa); + cq->sq.r.sq_bi[i].va = NULL; + cq->sq.r.sq_bi[i].pa = 0; + cq->sq.r.sq_bi[i].size = 0; + } + devm_kfree(ice_hw_to_dev(hw), cq->sq.dma_head); + + return ICE_ERR_NO_MEMORY; +} + +/** + * ice_free_rq_bufs - Free ARQ buffer info elements + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + */ +static void ice_free_rq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + int i; + + /* free descriptors */ + for (i = 0; i < cq->num_rq_entries; i++) { + dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.r.rq_bi[i].size, + cq->rq.r.rq_bi[i].va, cq->rq.r.rq_bi[i].pa); + cq->rq.r.rq_bi[i].va = NULL; + cq->rq.r.rq_bi[i].pa = 0; + cq->rq.r.rq_bi[i].size = 0; + } + + /* free the dma header */ + devm_kfree(ice_hw_to_dev(hw), cq->rq.dma_head); +} + +/** + * ice_free_sq_bufs - Free ATQ buffer info elements + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + */ +static void ice_free_sq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + int i; + + /* only unmap if the address is non-NULL */ + for (i = 0; i < cq->num_sq_entries; i++) + if (cq->sq.r.sq_bi[i].pa) { + dmam_free_coherent(ice_hw_to_dev(hw), + cq->sq.r.sq_bi[i].size, + cq->sq.r.sq_bi[i].va, + cq->sq.r.sq_bi[i].pa); + cq->sq.r.sq_bi[i].va = NULL; + cq->sq.r.sq_bi[i].pa = 0; + cq->sq.r.sq_bi[i].size = 0; + } + + /* free the buffer info list */ + devm_kfree(ice_hw_to_dev(hw), cq->sq.cmd_buf); + + /* free the dma header */ + devm_kfree(ice_hw_to_dev(hw), cq->sq.dma_head); +} + +/** + * ice_cfg_sq_regs - configure Control ATQ registers + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * Configure base address and length registers for the transmit queue + */ +static enum ice_status +ice_cfg_sq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + u32 reg = 0; + + /* Clear Head and Tail */ + wr32(hw, cq->sq.head, 0); + wr32(hw, cq->sq.tail, 0); + + /* set starting point */ + wr32(hw, cq->sq.len, (cq->num_sq_entries | cq->sq.len_ena_mask)); + wr32(hw, cq->sq.bal, lower_32_bits(cq->sq.desc_buf.pa)); + wr32(hw, cq->sq.bah, upper_32_bits(cq->sq.desc_buf.pa)); + + /* Check one register to verify that config was applied */ + reg = rd32(hw, cq->sq.bal); + if (reg != lower_32_bits(cq->sq.desc_buf.pa)) + return ICE_ERR_AQ_ERROR; + + return 0; +} + +/** + * ice_cfg_rq_regs - configure Control ARQ register + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * Configure base address and length registers for the receive (event q) + */ +static enum ice_status +ice_cfg_rq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + u32 reg = 0; + + /* Clear Head and Tail */ + wr32(hw, cq->rq.head, 0); + wr32(hw, cq->rq.tail, 0); + + /* set starting point */ + wr32(hw, cq->rq.len, (cq->num_rq_entries | cq->rq.len_ena_mask)); + wr32(hw, cq->rq.bal, lower_32_bits(cq->rq.desc_buf.pa)); + wr32(hw, cq->rq.bah, upper_32_bits(cq->rq.desc_buf.pa)); + + /* Update tail in the HW to post pre-allocated buffers */ + wr32(hw, cq->rq.tail, (u32)(cq->num_rq_entries - 1)); + + /* Check one register to verify that config was applied */ + reg = rd32(hw, cq->rq.bal); + if (reg != lower_32_bits(cq->rq.desc_buf.pa)) + return ICE_ERR_AQ_ERROR; + + return 0; +} + +/** + * ice_init_sq - main initialization routine for Control ATQ + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * This is the main initialization routine for the Control Send Queue + * Prior to calling this function, drivers *MUST* set the following fields + * in the cq->structure: + * - cq->num_sq_entries + * - cq->sq_buf_size + * + * Do *NOT* hold the lock when calling this as the memory allocation routines + * called are not going to be atomic context safe + */ +static enum ice_status ice_init_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + enum ice_status ret_code; + + if (cq->sq.count > 0) { + /* queue already initialized */ + ret_code = ICE_ERR_NOT_READY; + goto init_ctrlq_exit; + } + + /* verify input for valid configuration */ + if (!cq->num_sq_entries || !cq->sq_buf_size) { + ret_code = ICE_ERR_CFG; + goto init_ctrlq_exit; + } + + cq->sq.next_to_use = 0; + cq->sq.next_to_clean = 0; + + /* allocate the ring memory */ + ret_code = ice_alloc_ctrlq_sq_ring(hw, cq); + if (ret_code) + goto init_ctrlq_exit; + + /* allocate buffers in the rings */ + ret_code = ice_alloc_sq_bufs(hw, cq); + if (ret_code) + goto init_ctrlq_free_rings; + + /* initialize base registers */ + ret_code = ice_cfg_sq_regs(hw, cq); + if (ret_code) + goto init_ctrlq_free_rings; + + /* success! */ + cq->sq.count = cq->num_sq_entries; + goto init_ctrlq_exit; + +init_ctrlq_free_rings: + ice_free_ctrlq_sq_ring(hw, cq); + +init_ctrlq_exit: + return ret_code; +} + +/** + * ice_init_rq - initialize ARQ + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * The main initialization routine for the Admin Receive (Event) Queue. + * Prior to calling this function, drivers *MUST* set the following fields + * in the cq->structure: + * - cq->num_rq_entries + * - cq->rq_buf_size + * + * Do *NOT* hold the lock when calling this as the memory allocation routines + * called are not going to be atomic context safe + */ +static enum ice_status ice_init_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + enum ice_status ret_code; + + if (cq->rq.count > 0) { + /* queue already initialized */ + ret_code = ICE_ERR_NOT_READY; + goto init_ctrlq_exit; + } + + /* verify input for valid configuration */ + if (!cq->num_rq_entries || !cq->rq_buf_size) { + ret_code = ICE_ERR_CFG; + goto init_ctrlq_exit; + } + + cq->rq.next_to_use = 0; + cq->rq.next_to_clean = 0; + + /* allocate the ring memory */ + ret_code = ice_alloc_ctrlq_rq_ring(hw, cq); + if (ret_code) + goto init_ctrlq_exit; + + /* allocate buffers in the rings */ + ret_code = ice_alloc_rq_bufs(hw, cq); + if (ret_code) + goto init_ctrlq_free_rings; + + /* initialize base registers */ + ret_code = ice_cfg_rq_regs(hw, cq); + if (ret_code) + goto init_ctrlq_free_rings; + + /* success! */ + cq->rq.count = cq->num_rq_entries; + goto init_ctrlq_exit; + +init_ctrlq_free_rings: + ice_free_ctrlq_rq_ring(hw, cq); + +init_ctrlq_exit: + return ret_code; +} + +/** + * ice_shutdown_sq - shutdown the Control ATQ + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * The main shutdown routine for the Control Transmit Queue + */ +static enum ice_status +ice_shutdown_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + enum ice_status ret_code = 0; + + mutex_lock(&cq->sq_lock); + + if (!cq->sq.count) { + ret_code = ICE_ERR_NOT_READY; + goto shutdown_sq_out; + } + + /* Stop firmware AdminQ processing */ + wr32(hw, cq->sq.head, 0); + wr32(hw, cq->sq.tail, 0); + wr32(hw, cq->sq.len, 0); + wr32(hw, cq->sq.bal, 0); + wr32(hw, cq->sq.bah, 0); + + cq->sq.count = 0; /* to indicate uninitialized queue */ + + /* free ring buffers and the ring itself */ + ice_free_sq_bufs(hw, cq); + ice_free_ctrlq_sq_ring(hw, cq); + +shutdown_sq_out: + mutex_unlock(&cq->sq_lock); + return ret_code; +} + +/** + * ice_aq_ver_check - Check the reported AQ API version. + * @hw: pointer to the hardware structure + * + * Checks if the driver should load on a given AQ API version. + * + * Return: 'true' iff the driver should attempt to load. 'false' otherwise. + */ +static bool ice_aq_ver_check(struct ice_hw *hw) +{ + if (hw->api_maj_ver > EXP_FW_API_VER_MAJOR) { + /* Major API version is newer than expected, don't load */ + dev_warn(ice_hw_to_dev(hw), + "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n"); + return false; + } else if (hw->api_maj_ver == EXP_FW_API_VER_MAJOR) { + if (hw->api_min_ver > (EXP_FW_API_VER_MINOR + 2)) + dev_info(ice_hw_to_dev(hw), + "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n"); + else if ((hw->api_min_ver + 2) < EXP_FW_API_VER_MINOR) + dev_info(ice_hw_to_dev(hw), + "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n"); + } else { + /* Major API version is older than expected, log a warning */ + dev_info(ice_hw_to_dev(hw), + "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n"); + } + return true; +} + +/** + * ice_shutdown_rq - shutdown Control ARQ + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * The main shutdown routine for the Control Receive Queue + */ +static enum ice_status +ice_shutdown_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + enum ice_status ret_code = 0; + + mutex_lock(&cq->rq_lock); + + if (!cq->rq.count) { + ret_code = ICE_ERR_NOT_READY; + goto shutdown_rq_out; + } + + /* Stop Control Queue processing */ + wr32(hw, cq->rq.head, 0); + wr32(hw, cq->rq.tail, 0); + wr32(hw, cq->rq.len, 0); + wr32(hw, cq->rq.bal, 0); + wr32(hw, cq->rq.bah, 0); + + /* set rq.count to 0 to indicate uninitialized queue */ + cq->rq.count = 0; + + /* free ring buffers and the ring itself */ + ice_free_rq_bufs(hw, cq); + ice_free_ctrlq_rq_ring(hw, cq); + +shutdown_rq_out: + mutex_unlock(&cq->rq_lock); + return ret_code; +} + +/** + * ice_init_check_adminq - Check version for Admin Queue to know if its alive + * @hw: pointer to the hardware structure + */ +static enum ice_status ice_init_check_adminq(struct ice_hw *hw) +{ + struct ice_ctl_q_info *cq = &hw->adminq; + enum ice_status status; + + status = ice_aq_get_fw_ver(hw, NULL); + if (status) + goto init_ctrlq_free_rq; + + if (!ice_aq_ver_check(hw)) { + status = ICE_ERR_FW_API_VER; + goto init_ctrlq_free_rq; + } + + return 0; + +init_ctrlq_free_rq: + if (cq->rq.head) { + ice_shutdown_rq(hw, cq); + mutex_destroy(&cq->rq_lock); + } + if (cq->sq.head) { + ice_shutdown_sq(hw, cq); + mutex_destroy(&cq->sq_lock); + } + return status; +} + +/** + * ice_init_ctrlq - main initialization routine for any control Queue + * @hw: pointer to the hardware structure + * @q_type: specific Control queue type + * + * Prior to calling this function, drivers *MUST* set the following fields + * in the cq->structure: + * - cq->num_sq_entries + * - cq->num_rq_entries + * - cq->rq_buf_size + * - cq->sq_buf_size + * + */ +static enum ice_status ice_init_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) +{ + struct ice_ctl_q_info *cq; + enum ice_status ret_code; + + switch (q_type) { + case ICE_CTL_Q_ADMIN: + ice_adminq_init_regs(hw); + cq = &hw->adminq; + break; + default: + return ICE_ERR_PARAM; + } + cq->qtype = q_type; + + /* verify input for valid configuration */ + if (!cq->num_rq_entries || !cq->num_sq_entries || + !cq->rq_buf_size || !cq->sq_buf_size) { + return ICE_ERR_CFG; + } + mutex_init(&cq->sq_lock); + mutex_init(&cq->rq_lock); + + /* setup SQ command write back timeout */ + cq->sq_cmd_timeout = ICE_CTL_Q_SQ_CMD_TIMEOUT; + + /* allocate the ATQ */ + ret_code = ice_init_sq(hw, cq); + if (ret_code) + goto init_ctrlq_destroy_locks; + + /* allocate the ARQ */ + ret_code = ice_init_rq(hw, cq); + if (ret_code) + goto init_ctrlq_free_sq; + + /* success! */ + return 0; + +init_ctrlq_free_sq: + ice_shutdown_sq(hw, cq); +init_ctrlq_destroy_locks: + mutex_destroy(&cq->sq_lock); + mutex_destroy(&cq->rq_lock); + return ret_code; +} + +/** + * ice_init_all_ctrlq - main initialization routine for all control queues + * @hw: pointer to the hardware structure + * + * Prior to calling this function, drivers *MUST* set the following fields + * in the cq->structure for all control queues: + * - cq->num_sq_entries + * - cq->num_rq_entries + * - cq->rq_buf_size + * - cq->sq_buf_size + */ +enum ice_status ice_init_all_ctrlq(struct ice_hw *hw) +{ + enum ice_status ret_code; + + /* Init FW admin queue */ + ret_code = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN); + if (ret_code) + return ret_code; + + return ice_init_check_adminq(hw); +} + +/** + * ice_shutdown_ctrlq - shutdown routine for any control queue + * @hw: pointer to the hardware structure + * @q_type: specific Control queue type + */ +static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) +{ + struct ice_ctl_q_info *cq; + + switch (q_type) { + case ICE_CTL_Q_ADMIN: + cq = &hw->adminq; + if (ice_check_sq_alive(hw, cq)) + ice_aq_q_shutdown(hw, true); + break; + default: + return; + } + + if (cq->sq.head) { + ice_shutdown_sq(hw, cq); + mutex_destroy(&cq->sq_lock); + } + if (cq->rq.head) { + ice_shutdown_rq(hw, cq); + mutex_destroy(&cq->rq_lock); + } +} + +/** + * ice_shutdown_all_ctrlq - shutdown routine for all control queues + * @hw: pointer to the hardware structure + */ +void ice_shutdown_all_ctrlq(struct ice_hw *hw) +{ + /* Shutdown FW admin queue */ + ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN); +} + +/** + * ice_clean_sq - cleans Admin send queue (ATQ) + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * returns the number of free desc + */ +static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + struct ice_ctl_q_ring *sq = &cq->sq; + u16 ntc = sq->next_to_clean; + struct ice_sq_cd *details; + struct ice_aq_desc *desc; + + desc = ICE_CTL_Q_DESC(*sq, ntc); + details = ICE_CTL_Q_DETAILS(*sq, ntc); + + while (rd32(hw, cq->sq.head) != ntc) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "ntc %d head %d.\n", ntc, rd32(hw, cq->sq.head)); + memset(desc, 0, sizeof(*desc)); + memset(details, 0, sizeof(*details)); + ntc++; + if (ntc == sq->count) + ntc = 0; + desc = ICE_CTL_Q_DESC(*sq, ntc); + details = ICE_CTL_Q_DETAILS(*sq, ntc); + } + + sq->next_to_clean = ntc; + + return ICE_CTL_Q_DESC_UNUSED(sq); +} + +/** + * ice_sq_done - check if FW has processed the Admin Send Queue (ATQ) + * @hw: pointer to the hw struct + * @cq: pointer to the specific Control queue + * + * Returns true if the firmware has processed all descriptors on the + * admin send queue. Returns false if there are still requests pending. + */ +static bool ice_sq_done(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + /* AQ designers suggest use of head for better + * timing reliability than DD bit + */ + return rd32(hw, cq->sq.head) == cq->sq.next_to_use; +} + +/** + * ice_sq_send_cmd - send command to Control Queue (ATQ) + * @hw: pointer to the hw struct + * @cq: pointer to the specific Control queue + * @desc: prefilled descriptor describing the command (non DMA mem) + * @buf: buffer to use for indirect commands (or NULL for direct commands) + * @buf_size: size of buffer for indirect commands (or 0 for direct commands) + * @cd: pointer to command details structure + * + * This is the main send command routine for the ATQ. It runs the q, + * cleans the queue, etc. + */ +enum ice_status +ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, + struct ice_aq_desc *desc, void *buf, u16 buf_size, + struct ice_sq_cd *cd) +{ + struct ice_dma_mem *dma_buf = NULL; + struct ice_aq_desc *desc_on_ring; + bool cmd_completed = false; + enum ice_status status = 0; + struct ice_sq_cd *details; + u32 total_delay = 0; + u16 retval = 0; + u32 val = 0; + + /* if reset is in progress return a soft error */ + if (hw->reset_ongoing) + return ICE_ERR_RESET_ONGOING; + mutex_lock(&cq->sq_lock); + + cq->sq_last_status = ICE_AQ_RC_OK; + + if (!cq->sq.count) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "Control Send queue not initialized.\n"); + status = ICE_ERR_AQ_EMPTY; + goto sq_send_command_error; + } + + if ((buf && !buf_size) || (!buf && buf_size)) { + status = ICE_ERR_PARAM; + goto sq_send_command_error; + } + + if (buf) { + if (buf_size > cq->sq_buf_size) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "Invalid buffer size for Control Send queue: %d.\n", + buf_size); + status = ICE_ERR_INVAL_SIZE; + goto sq_send_command_error; + } + + desc->flags |= cpu_to_le16(ICE_AQ_FLAG_BUF); + if (buf_size > ICE_AQ_LG_BUF) + desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB); + } + + val = rd32(hw, cq->sq.head); + if (val >= cq->num_sq_entries) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "head overrun at %d in the Control Send Queue ring\n", + val); + status = ICE_ERR_AQ_EMPTY; + goto sq_send_command_error; + } + + details = ICE_CTL_Q_DETAILS(cq->sq, cq->sq.next_to_use); + if (cd) + memcpy(details, cd, sizeof(*details)); + else + memset(details, 0, sizeof(*details)); + + /* Call clean and check queue available function to reclaim the + * descriptors that were processed by FW/MBX; the function returns the + * number of desc available. The clean function called here could be + * called in a separate thread in case of asynchronous completions. + */ + if (ice_clean_sq(hw, cq) == 0) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "Error: Control Send Queue is full.\n"); + status = ICE_ERR_AQ_FULL; + goto sq_send_command_error; + } + + /* initialize the temp desc pointer with the right desc */ + desc_on_ring = ICE_CTL_Q_DESC(cq->sq, cq->sq.next_to_use); + + /* if the desc is available copy the temp desc to the right place */ + memcpy(desc_on_ring, desc, sizeof(*desc_on_ring)); + + /* if buf is not NULL assume indirect command */ + if (buf) { + dma_buf = &cq->sq.r.sq_bi[cq->sq.next_to_use]; + /* copy the user buf into the respective DMA buf */ + memcpy(dma_buf->va, buf, buf_size); + desc_on_ring->datalen = cpu_to_le16(buf_size); + + /* Update the address values in the desc with the pa value + * for respective buffer + */ + desc_on_ring->params.generic.addr_high = + cpu_to_le32(upper_32_bits(dma_buf->pa)); + desc_on_ring->params.generic.addr_low = + cpu_to_le32(lower_32_bits(dma_buf->pa)); + } + + /* Debug desc and buffer */ + ice_debug(hw, ICE_DBG_AQ_MSG, + "ATQ: Control Send queue desc and buffer:\n"); + + ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc_on_ring, buf, buf_size); + + (cq->sq.next_to_use)++; + if (cq->sq.next_to_use == cq->sq.count) + cq->sq.next_to_use = 0; + wr32(hw, cq->sq.tail, cq->sq.next_to_use); + + do { + if (ice_sq_done(hw, cq)) + break; + + udelay(ICE_CTL_Q_SQ_CMD_USEC); + total_delay++; + } while (total_delay < cq->sq_cmd_timeout); + + /* if ready, copy the desc back to temp */ + if (ice_sq_done(hw, cq)) { + memcpy(desc, desc_on_ring, sizeof(*desc)); + if (buf) { + /* get returned length to copy */ + u16 copy_size = le16_to_cpu(desc->datalen); + + if (copy_size > buf_size) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "Return len %d > than buf len %d\n", + copy_size, buf_size); + status = ICE_ERR_AQ_ERROR; + } else { + memcpy(buf, dma_buf->va, copy_size); + } + } + retval = le16_to_cpu(desc->retval); + if (retval) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "Control Send Queue command completed with error 0x%x\n", + retval); + + /* strip off FW internal code */ + retval &= 0xff; + } + cmd_completed = true; + if (!status && retval != ICE_AQ_RC_OK) + status = ICE_ERR_AQ_ERROR; + cq->sq_last_status = (enum ice_aq_err)retval; + } + + ice_debug(hw, ICE_DBG_AQ_MSG, + "ATQ: desc and buffer writeback:\n"); + + ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, buf, buf_size); + + /* save writeback AQ if requested */ + if (details->wb_desc) + memcpy(details->wb_desc, desc_on_ring, + sizeof(*details->wb_desc)); + + /* update the error if time out occurred */ + if (!cmd_completed) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "Control Send Queue Writeback timeout.\n"); + status = ICE_ERR_AQ_TIMEOUT; + } + +sq_send_command_error: + mutex_unlock(&cq->sq_lock); + return status; +} + +/** + * ice_fill_dflt_direct_cmd_desc - AQ descriptor helper function + * @desc: pointer to the temp descriptor (non DMA mem) + * @opcode: the opcode can be used to decide which flags to turn off or on + * + * Fill the desc with default values + */ +void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode) +{ + /* zero out the desc */ + memset(desc, 0, sizeof(*desc)); + desc->opcode = cpu_to_le16(opcode); + desc->flags = cpu_to_le16(ICE_AQ_FLAG_SI); +} + +/** + * ice_clean_rq_elem + * @hw: pointer to the hw struct + * @cq: pointer to the specific Control queue + * @e: event info from the receive descriptor, includes any buffers + * @pending: number of events that could be left to process + * + * This function cleans one Admin Receive Queue element and returns + * the contents through e. It can also return how many events are + * left to process through 'pending'. + */ +enum ice_status +ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, + struct ice_rq_event_info *e, u16 *pending) +{ + u16 ntc = cq->rq.next_to_clean; + enum ice_status ret_code = 0; + struct ice_aq_desc *desc; + struct ice_dma_mem *bi; + u16 desc_idx; + u16 datalen; + u16 flags; + u16 ntu; + + /* pre-clean the event info */ + memset(&e->desc, 0, sizeof(e->desc)); + + /* take the lock before we start messing with the ring */ + mutex_lock(&cq->rq_lock); + + if (!cq->rq.count) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "Control Receive queue not initialized.\n"); + ret_code = ICE_ERR_AQ_EMPTY; + goto clean_rq_elem_err; + } + + /* set next_to_use to head */ + ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask); + + if (ntu == ntc) { + /* nothing to do - shouldn't need to update ring's values */ + ret_code = ICE_ERR_AQ_NO_WORK; + goto clean_rq_elem_out; + } + + /* now clean the next descriptor */ + desc = ICE_CTL_Q_DESC(cq->rq, ntc); + desc_idx = ntc; + + cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval); + flags = le16_to_cpu(desc->flags); + if (flags & ICE_AQ_FLAG_ERR) { + ret_code = ICE_ERR_AQ_ERROR; + ice_debug(hw, ICE_DBG_AQ_MSG, + "Control Receive Queue Event received with error 0x%x\n", + cq->rq_last_status); + } + memcpy(&e->desc, desc, sizeof(e->desc)); + datalen = le16_to_cpu(desc->datalen); + e->msg_len = min(datalen, e->buf_len); + if (e->msg_buf && e->msg_len) + memcpy(e->msg_buf, cq->rq.r.rq_bi[desc_idx].va, e->msg_len); + + ice_debug(hw, ICE_DBG_AQ_MSG, "ARQ: desc and buffer:\n"); + + ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, e->msg_buf, + cq->rq_buf_size); + + /* Restore the original datalen and buffer address in the desc, + * FW updates datalen to indicate the event message size + */ + bi = &cq->rq.r.rq_bi[ntc]; + memset(desc, 0, sizeof(*desc)); + + desc->flags = cpu_to_le16(ICE_AQ_FLAG_BUF); + if (cq->rq_buf_size > ICE_AQ_LG_BUF) + desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB); + desc->datalen = cpu_to_le16(bi->size); + desc->params.generic.addr_high = cpu_to_le32(upper_32_bits(bi->pa)); + desc->params.generic.addr_low = cpu_to_le32(lower_32_bits(bi->pa)); + + /* set tail = the last cleaned desc index. */ + wr32(hw, cq->rq.tail, ntc); + /* ntc is updated to tail + 1 */ + ntc++; + if (ntc == cq->num_rq_entries) + ntc = 0; + cq->rq.next_to_clean = ntc; + cq->rq.next_to_use = ntu; + +clean_rq_elem_out: + /* Set pending if needed, unlock and return */ + if (pending) { + /* re-read HW head to calculate actual pending messages */ + ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask); + *pending = (u16)((ntc > ntu ? cq->rq.count : 0) + (ntu - ntc)); + } +clean_rq_elem_err: + mutex_unlock(&cq->rq_lock); + + return ret_code; +} diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h new file mode 100644 index 000000000..7960f0e4d --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_CONTROLQ_H_ +#define _ICE_CONTROLQ_H_ + +#include "ice_adminq_cmd.h" + +/* Maximum buffer lengths for all control queue types */ +#define ICE_AQ_MAX_BUF_LEN 4096 + +#define ICE_CTL_Q_DESC(R, i) \ + (&(((struct ice_aq_desc *)((R).desc_buf.va))[i])) + +#define ICE_CTL_Q_DESC_UNUSED(R) \ + (u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ + (R)->next_to_clean - (R)->next_to_use - 1) + +/* Defines that help manage the driver vs FW API checks. + * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage. + * + */ +#define EXP_FW_API_VER_BRANCH 0x00 +#define EXP_FW_API_VER_MAJOR 0x00 +#define EXP_FW_API_VER_MINOR 0x01 + +/* Different control queue types: These are mainly for SW consumption. */ +enum ice_ctl_q { + ICE_CTL_Q_UNKNOWN = 0, + ICE_CTL_Q_ADMIN, +}; + +/* Control Queue timeout settings - max delay 1s */ +#define ICE_CTL_Q_SQ_CMD_TIMEOUT 10000 /* Count 10000 times */ +#define ICE_CTL_Q_SQ_CMD_USEC 100 /* Check every 100usec */ + +struct ice_ctl_q_ring { + void *dma_head; /* Virtual address to dma head */ + struct ice_dma_mem desc_buf; /* descriptor ring memory */ + void *cmd_buf; /* command buffer memory */ + + union { + struct ice_dma_mem *sq_bi; + struct ice_dma_mem *rq_bi; + } r; + + u16 count; /* Number of descriptors */ + + /* used for interrupt processing */ + u16 next_to_use; + u16 next_to_clean; + + /* used for queue tracking */ + u32 head; + u32 tail; + u32 len; + u32 bah; + u32 bal; + u32 len_mask; + u32 len_ena_mask; + u32 head_mask; +}; + +/* sq transaction details */ +struct ice_sq_cd { + struct ice_aq_desc *wb_desc; +}; + +#define ICE_CTL_Q_DETAILS(R, i) (&(((struct ice_sq_cd *)((R).cmd_buf))[i])) + +/* rq event information */ +struct ice_rq_event_info { + struct ice_aq_desc desc; + u16 msg_len; + u16 buf_len; + u8 *msg_buf; +}; + +/* Control Queue information */ +struct ice_ctl_q_info { + enum ice_ctl_q qtype; + struct ice_ctl_q_ring rq; /* receive queue */ + struct ice_ctl_q_ring sq; /* send queue */ + u32 sq_cmd_timeout; /* send queue cmd write back timeout */ + u16 num_rq_entries; /* receive queue depth */ + u16 num_sq_entries; /* send queue depth */ + u16 rq_buf_size; /* receive queue buffer size */ + u16 sq_buf_size; /* send queue buffer size */ + struct mutex sq_lock; /* Send queue lock */ + struct mutex rq_lock; /* Receive queue lock */ + enum ice_aq_err sq_last_status; /* last status on send queue */ + enum ice_aq_err rq_last_status; /* last status on receive queue */ +}; + +#endif /* _ICE_CONTROLQ_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h new file mode 100644 index 000000000..0e14d7215 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_devids.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_DEVIDS_H_ +#define _ICE_DEVIDS_H_ + +/* Device IDs */ +/* Intel(R) Ethernet Controller C810 for backplane */ +#define ICE_DEV_ID_C810_BACKPLANE 0x1591 +/* Intel(R) Ethernet Controller C810 for QSFP */ +#define ICE_DEV_ID_C810_QSFP 0x1592 +/* Intel(R) Ethernet Controller C810 for SFP */ +#define ICE_DEV_ID_C810_SFP 0x1593 +/* Intel(R) Ethernet Controller C810/X557-AT 10GBASE-T */ +#define ICE_DEV_ID_C810_10G_BASE_T 0x1594 +/* Intel(R) Ethernet Controller C810 1GbE */ +#define ICE_DEV_ID_C810_SGMII 0x1595 + +#endif /* _ICE_DEVIDS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c new file mode 100644 index 000000000..627abef82 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -0,0 +1,975 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +/* ethtool support for ice */ + +#include "ice.h" + +struct ice_stats { + char stat_string[ETH_GSTRING_LEN]; + int sizeof_stat; + int stat_offset; +}; + +#define ICE_STAT(_type, _name, _stat) { \ + .stat_string = _name, \ + .sizeof_stat = FIELD_SIZEOF(_type, _stat), \ + .stat_offset = offsetof(_type, _stat) \ +} + +#define ICE_VSI_STAT(_name, _stat) \ + ICE_STAT(struct ice_vsi, _name, _stat) +#define ICE_PF_STAT(_name, _stat) \ + ICE_STAT(struct ice_pf, _name, _stat) + +static int ice_q_stats_len(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + + return ((np->vsi->alloc_txq + np->vsi->alloc_rxq) * + (sizeof(struct ice_q_stats) / sizeof(u64))); +} + +#define ICE_PF_STATS_LEN ARRAY_SIZE(ice_gstrings_pf_stats) +#define ICE_VSI_STATS_LEN ARRAY_SIZE(ice_gstrings_vsi_stats) + +#define ICE_ALL_STATS_LEN(n) (ICE_PF_STATS_LEN + ICE_VSI_STATS_LEN + \ + ice_q_stats_len(n)) + +static const struct ice_stats ice_gstrings_vsi_stats[] = { + ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast), + ICE_VSI_STAT("rx_unicast", eth_stats.rx_unicast), + ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast), + ICE_VSI_STAT("rx_multicast", eth_stats.rx_multicast), + ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast), + ICE_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast), + ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes), + ICE_VSI_STAT("rx_bytes", eth_stats.rx_bytes), + ICE_VSI_STAT("rx_discards", eth_stats.rx_discards), + ICE_VSI_STAT("tx_errors", eth_stats.tx_errors), + ICE_VSI_STAT("tx_linearize", tx_linearize), + ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol), + ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed), + ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed), +}; + +/* These PF_STATs might look like duplicates of some NETDEV_STATs, + * but they aren't. This device is capable of supporting multiple + * VSIs/netdevs on a single PF. The NETDEV_STATs are for individual + * netdevs whereas the PF_STATs are for the physical function that's + * hosting these netdevs. + * + * The PF_STATs are appended to the netdev stats only when ethtool -S + * is queried on the base PF netdev. + */ +static struct ice_stats ice_gstrings_pf_stats[] = { + ICE_PF_STAT("tx_bytes", stats.eth.tx_bytes), + ICE_PF_STAT("rx_bytes", stats.eth.rx_bytes), + ICE_PF_STAT("tx_unicast", stats.eth.tx_unicast), + ICE_PF_STAT("rx_unicast", stats.eth.rx_unicast), + ICE_PF_STAT("tx_multicast", stats.eth.tx_multicast), + ICE_PF_STAT("rx_multicast", stats.eth.rx_multicast), + ICE_PF_STAT("tx_broadcast", stats.eth.tx_broadcast), + ICE_PF_STAT("rx_broadcast", stats.eth.rx_broadcast), + ICE_PF_STAT("tx_errors", stats.eth.tx_errors), + ICE_PF_STAT("tx_size_64", stats.tx_size_64), + ICE_PF_STAT("rx_size_64", stats.rx_size_64), + ICE_PF_STAT("tx_size_127", stats.tx_size_127), + ICE_PF_STAT("rx_size_127", stats.rx_size_127), + ICE_PF_STAT("tx_size_255", stats.tx_size_255), + ICE_PF_STAT("rx_size_255", stats.rx_size_255), + ICE_PF_STAT("tx_size_511", stats.tx_size_511), + ICE_PF_STAT("rx_size_511", stats.rx_size_511), + ICE_PF_STAT("tx_size_1023", stats.tx_size_1023), + ICE_PF_STAT("rx_size_1023", stats.rx_size_1023), + ICE_PF_STAT("tx_size_1522", stats.tx_size_1522), + ICE_PF_STAT("rx_size_1522", stats.rx_size_1522), + ICE_PF_STAT("tx_size_big", stats.tx_size_big), + ICE_PF_STAT("rx_size_big", stats.rx_size_big), + ICE_PF_STAT("link_xon_tx", stats.link_xon_tx), + ICE_PF_STAT("link_xon_rx", stats.link_xon_rx), + ICE_PF_STAT("link_xoff_tx", stats.link_xoff_tx), + ICE_PF_STAT("link_xoff_rx", stats.link_xoff_rx), + ICE_PF_STAT("tx_dropped_link_down", stats.tx_dropped_link_down), + ICE_PF_STAT("rx_undersize", stats.rx_undersize), + ICE_PF_STAT("rx_fragments", stats.rx_fragments), + ICE_PF_STAT("rx_oversize", stats.rx_oversize), + ICE_PF_STAT("rx_jabber", stats.rx_jabber), + ICE_PF_STAT("rx_csum_bad", hw_csum_rx_error), + ICE_PF_STAT("rx_length_errors", stats.rx_len_errors), + ICE_PF_STAT("rx_dropped", stats.eth.rx_discards), + ICE_PF_STAT("rx_crc_errors", stats.crc_errors), + ICE_PF_STAT("illegal_bytes", stats.illegal_bytes), + ICE_PF_STAT("mac_local_faults", stats.mac_local_faults), + ICE_PF_STAT("mac_remote_faults", stats.mac_remote_faults), +}; + +static u32 ice_regs_dump_list[] = { + PFGEN_STATE, + PRTGEN_STATUS, + QRX_CTRL(0), + QINT_TQCTL(0), + QINT_RQCTL(0), + PFINT_OICR_ENA, + QRX_ITR(0), +}; + +/** + * ice_nvm_version_str - format the NVM version strings + * @hw: ptr to the hardware info + */ +static char *ice_nvm_version_str(struct ice_hw *hw) +{ + static char buf[ICE_ETHTOOL_FWVER_LEN]; + u8 ver, patch; + u32 full_ver; + u16 build; + + full_ver = hw->nvm.oem_ver; + ver = (u8)((full_ver & ICE_OEM_VER_MASK) >> ICE_OEM_VER_SHIFT); + build = (u16)((full_ver & ICE_OEM_VER_BUILD_MASK) >> + ICE_OEM_VER_BUILD_SHIFT); + patch = (u8)(full_ver & ICE_OEM_VER_PATCH_MASK); + + snprintf(buf, sizeof(buf), "%x.%02x 0x%x %d.%d.%d", + (hw->nvm.ver & ICE_NVM_VER_HI_MASK) >> ICE_NVM_VER_HI_SHIFT, + (hw->nvm.ver & ICE_NVM_VER_LO_MASK) >> ICE_NVM_VER_LO_SHIFT, + hw->nvm.eetrack, ver, build, patch); + + return buf; +} + +static void +ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + + strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, ice_drv_ver, sizeof(drvinfo->version)); + strlcpy(drvinfo->fw_version, ice_nvm_version_str(&pf->hw), + sizeof(drvinfo->fw_version)); + strlcpy(drvinfo->bus_info, pci_name(pf->pdev), + sizeof(drvinfo->bus_info)); +} + +static int ice_get_regs_len(struct net_device __always_unused *netdev) +{ + return sizeof(ice_regs_dump_list); +} + +static void +ice_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + struct ice_hw *hw = &pf->hw; + u32 *regs_buf = (u32 *)p; + int i; + + regs->version = 1; + + for (i = 0; i < ARRAY_SIZE(ice_regs_dump_list); ++i) + regs_buf[i] = rd32(hw, ice_regs_dump_list[i]); +} + +static u32 ice_get_msglevel(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + +#ifndef CONFIG_DYNAMIC_DEBUG + if (pf->hw.debug_mask) + netdev_info(netdev, "hw debug_mask: 0x%llX\n", + pf->hw.debug_mask); +#endif /* !CONFIG_DYNAMIC_DEBUG */ + + return pf->msg_enable; +} + +static void ice_set_msglevel(struct net_device *netdev, u32 data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + +#ifndef CONFIG_DYNAMIC_DEBUG + if (ICE_DBG_USER & data) + pf->hw.debug_mask = data; + else + pf->msg_enable = data; +#else + pf->msg_enable = data; +#endif /* !CONFIG_DYNAMIC_DEBUG */ +} + +static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + char *p = (char *)data; + unsigned int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < ICE_VSI_STATS_LEN; i++) { + snprintf(p, ETH_GSTRING_LEN, "%s", + ice_gstrings_vsi_stats[i].stat_string); + p += ETH_GSTRING_LEN; + } + + ice_for_each_alloc_txq(vsi, i) { + snprintf(p, ETH_GSTRING_LEN, + "tx-queue-%u.tx_packets", i); + p += ETH_GSTRING_LEN; + snprintf(p, ETH_GSTRING_LEN, "tx-queue-%u.tx_bytes", i); + p += ETH_GSTRING_LEN; + } + + ice_for_each_alloc_rxq(vsi, i) { + snprintf(p, ETH_GSTRING_LEN, + "rx-queue-%u.rx_packets", i); + p += ETH_GSTRING_LEN; + snprintf(p, ETH_GSTRING_LEN, "rx-queue-%u.rx_bytes", i); + p += ETH_GSTRING_LEN; + } + + if (vsi->type != ICE_VSI_PF) + return; + + for (i = 0; i < ICE_PF_STATS_LEN; i++) { + snprintf(p, ETH_GSTRING_LEN, "port.%s", + ice_gstrings_pf_stats[i].stat_string); + p += ETH_GSTRING_LEN; + } + + break; + default: + break; + } +} + +static int ice_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + /* The number (and order) of strings reported *must* remain + * constant for a given netdevice. This function must not + * report a different number based on run time parameters + * (such as the number of queues in use, or the setting of + * a private ethtool flag). This is due to the nature of the + * ethtool stats API. + * + * User space programs such as ethtool must make 3 separate + * ioctl requests, one for size, one for the strings, and + * finally one for the stats. Since these cross into + * user space, changes to the number or size could result in + * undefined memory access or incorrect string<->value + * correlations for statistics. + * + * Even if it appears to be safe, changes to the size or + * order of strings will suffer from race conditions and are + * not safe. + */ + return ICE_ALL_STATS_LEN(netdev); + default: + return -EOPNOTSUPP; + } +} + +static void +ice_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, u64 *data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + struct ice_ring *ring; + unsigned int j = 0; + int i = 0; + char *p; + + for (j = 0; j < ICE_VSI_STATS_LEN; j++) { + p = (char *)vsi + ice_gstrings_vsi_stats[j].stat_offset; + data[i++] = (ice_gstrings_vsi_stats[j].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + + /* populate per queue stats */ + rcu_read_lock(); + + ice_for_each_alloc_txq(vsi, j) { + ring = READ_ONCE(vsi->tx_rings[j]); + if (ring) { + data[i++] = ring->stats.pkts; + data[i++] = ring->stats.bytes; + } else { + data[i++] = 0; + data[i++] = 0; + } + } + + ice_for_each_alloc_rxq(vsi, j) { + ring = READ_ONCE(vsi->rx_rings[j]); + if (ring) { + data[i++] = ring->stats.pkts; + data[i++] = ring->stats.bytes; + } else { + data[i++] = 0; + data[i++] = 0; + } + } + + rcu_read_unlock(); + + if (vsi->type != ICE_VSI_PF) + return; + + for (j = 0; j < ICE_PF_STATS_LEN; j++) { + p = (char *)pf + ice_gstrings_pf_stats[j].stat_offset; + data[i++] = (ice_gstrings_pf_stats[j].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } +} + +static int +ice_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *ks) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_link_status *hw_link_info; + struct ice_vsi *vsi = np->vsi; + bool link_up; + + hw_link_info = &vsi->port_info->phy.link_info; + link_up = hw_link_info->link_info & ICE_AQ_LINK_UP; + + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseT_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseT_Full); + + /* set speed and duplex */ + if (link_up) { + switch (hw_link_info->link_speed) { + case ICE_AQ_LINK_SPEED_100MB: + ks->base.speed = SPEED_100; + break; + case ICE_AQ_LINK_SPEED_2500MB: + ks->base.speed = SPEED_2500; + break; + case ICE_AQ_LINK_SPEED_5GB: + ks->base.speed = SPEED_5000; + break; + case ICE_AQ_LINK_SPEED_10GB: + ks->base.speed = SPEED_10000; + break; + case ICE_AQ_LINK_SPEED_25GB: + ks->base.speed = SPEED_25000; + break; + case ICE_AQ_LINK_SPEED_40GB: + ks->base.speed = SPEED_40000; + break; + default: + ks->base.speed = SPEED_UNKNOWN; + break; + } + + ks->base.duplex = DUPLEX_FULL; + } else { + ks->base.speed = SPEED_UNKNOWN; + ks->base.duplex = DUPLEX_UNKNOWN; + } + + /* set autoneg settings */ + ks->base.autoneg = ((hw_link_info->an_info & ICE_AQ_AN_COMPLETED) ? + AUTONEG_ENABLE : AUTONEG_DISABLE); + + /* set media type settings */ + switch (vsi->port_info->phy.media_type) { + case ICE_MEDIA_FIBER: + ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE); + ks->base.port = PORT_FIBRE; + break; + case ICE_MEDIA_BASET: + ethtool_link_ksettings_add_link_mode(ks, supported, TP); + ethtool_link_ksettings_add_link_mode(ks, advertising, TP); + ks->base.port = PORT_TP; + break; + case ICE_MEDIA_BACKPLANE: + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, Backplane); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + Backplane); + ks->base.port = PORT_NONE; + break; + case ICE_MEDIA_DA: + ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE); + ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE); + ks->base.port = PORT_DA; + break; + default: + ks->base.port = PORT_OTHER; + break; + } + + /* flow control is symmetric and always supported */ + ethtool_link_ksettings_add_link_mode(ks, supported, Pause); + + switch (vsi->port_info->fc.req_mode) { + case ICE_FC_FULL: + ethtool_link_ksettings_add_link_mode(ks, advertising, Pause); + break; + case ICE_FC_TX_PAUSE: + ethtool_link_ksettings_add_link_mode(ks, advertising, + Asym_Pause); + break; + case ICE_FC_RX_PAUSE: + ethtool_link_ksettings_add_link_mode(ks, advertising, Pause); + ethtool_link_ksettings_add_link_mode(ks, advertising, + Asym_Pause); + break; + case ICE_FC_PFC: + default: + ethtool_link_ksettings_del_link_mode(ks, advertising, Pause); + ethtool_link_ksettings_del_link_mode(ks, advertising, + Asym_Pause); + break; + } + + return 0; +} + +/** + * ice_get_rxnfc - command to get RX flow classification rules + * @netdev: network interface device structure + * @cmd: ethtool rxnfc command + * @rule_locs: buffer to rturn Rx flow classification rules + * + * Returns Success if the command is supported. + */ +static int ice_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, + u32 __always_unused *rule_locs) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = vsi->rss_size; + ret = 0; + break; + default: + break; + } + + return ret; +} + +static void +ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + + ring->rx_max_pending = ICE_MAX_NUM_DESC; + ring->tx_max_pending = ICE_MAX_NUM_DESC; + ring->rx_pending = vsi->rx_rings[0]->count; + ring->tx_pending = vsi->tx_rings[0]->count; + + /* Rx mini and jumbo rings are not supported */ + ring->rx_mini_max_pending = 0; + ring->rx_jumbo_max_pending = 0; + ring->rx_mini_pending = 0; + ring->rx_jumbo_pending = 0; +} + +static int +ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) +{ + struct ice_ring *tx_rings = NULL, *rx_rings = NULL; + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + int i, timeout = 50, err = 0; + u32 new_rx_cnt, new_tx_cnt; + + if (ring->tx_pending > ICE_MAX_NUM_DESC || + ring->tx_pending < ICE_MIN_NUM_DESC || + ring->rx_pending > ICE_MAX_NUM_DESC || + ring->rx_pending < ICE_MIN_NUM_DESC) { + netdev_err(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d] (increment %d)\n", + ring->tx_pending, ring->rx_pending, + ICE_MIN_NUM_DESC, ICE_MAX_NUM_DESC, + ICE_REQ_DESC_MULTIPLE); + return -EINVAL; + } + + new_tx_cnt = ALIGN(ring->tx_pending, ICE_REQ_DESC_MULTIPLE); + if (new_tx_cnt != ring->tx_pending) + netdev_info(netdev, + "Requested Tx descriptor count rounded up to %d\n", + new_tx_cnt); + new_rx_cnt = ALIGN(ring->rx_pending, ICE_REQ_DESC_MULTIPLE); + if (new_rx_cnt != ring->rx_pending) + netdev_info(netdev, + "Requested Rx descriptor count rounded up to %d\n", + new_rx_cnt); + + /* if nothing to do return success */ + if (new_tx_cnt == vsi->tx_rings[0]->count && + new_rx_cnt == vsi->rx_rings[0]->count) { + netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n"); + return 0; + } + + while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) { + timeout--; + if (!timeout) + return -EBUSY; + usleep_range(1000, 2000); + } + + /* set for the next time the netdev is started */ + if (!netif_running(vsi->netdev)) { + for (i = 0; i < vsi->alloc_txq; i++) + vsi->tx_rings[i]->count = new_tx_cnt; + for (i = 0; i < vsi->alloc_rxq; i++) + vsi->rx_rings[i]->count = new_rx_cnt; + netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n"); + goto done; + } + + if (new_tx_cnt == vsi->tx_rings[0]->count) + goto process_rx; + + /* alloc updated Tx resources */ + netdev_info(netdev, "Changing Tx descriptor count from %d to %d\n", + vsi->tx_rings[0]->count, new_tx_cnt); + + tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq, + sizeof(struct ice_ring), GFP_KERNEL); + if (!tx_rings) { + err = -ENOMEM; + goto done; + } + + for (i = 0; i < vsi->alloc_txq; i++) { + /* clone ring and setup updated count */ + tx_rings[i] = *vsi->tx_rings[i]; + tx_rings[i].count = new_tx_cnt; + tx_rings[i].desc = NULL; + tx_rings[i].tx_buf = NULL; + err = ice_setup_tx_ring(&tx_rings[i]); + if (err) { + while (i) { + i--; + ice_clean_tx_ring(&tx_rings[i]); + } + devm_kfree(&pf->pdev->dev, tx_rings); + goto done; + } + } + +process_rx: + if (new_rx_cnt == vsi->rx_rings[0]->count) + goto process_link; + + /* alloc updated Rx resources */ + netdev_info(netdev, "Changing Rx descriptor count from %d to %d\n", + vsi->rx_rings[0]->count, new_rx_cnt); + + rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq, + sizeof(struct ice_ring), GFP_KERNEL); + if (!rx_rings) { + err = -ENOMEM; + goto done; + } + + for (i = 0; i < vsi->alloc_rxq; i++) { + /* clone ring and setup updated count */ + rx_rings[i] = *vsi->rx_rings[i]; + rx_rings[i].count = new_rx_cnt; + rx_rings[i].desc = NULL; + rx_rings[i].rx_buf = NULL; + /* this is to allow wr32 to have something to write to + * during early allocation of Rx buffers + */ + rx_rings[i].tail = vsi->back->hw.hw_addr + PRTGEN_STATUS; + + err = ice_setup_rx_ring(&rx_rings[i]); + if (err) + goto rx_unwind; + + /* allocate Rx buffers */ + err = ice_alloc_rx_bufs(&rx_rings[i], + ICE_DESC_UNUSED(&rx_rings[i])); +rx_unwind: + if (err) { + while (i) { + i--; + ice_free_rx_ring(&rx_rings[i]); + } + devm_kfree(&pf->pdev->dev, rx_rings); + err = -ENOMEM; + goto free_tx; + } + } + +process_link: + /* Bring interface down, copy in the new ring info, then restore the + * interface. if VSI is up, bring it down and then back up + */ + if (!test_and_set_bit(__ICE_DOWN, vsi->state)) { + ice_down(vsi); + + if (tx_rings) { + for (i = 0; i < vsi->alloc_txq; i++) { + ice_free_tx_ring(vsi->tx_rings[i]); + *vsi->tx_rings[i] = tx_rings[i]; + } + devm_kfree(&pf->pdev->dev, tx_rings); + } + + if (rx_rings) { + for (i = 0; i < vsi->alloc_rxq; i++) { + ice_free_rx_ring(vsi->rx_rings[i]); + /* copy the real tail offset */ + rx_rings[i].tail = vsi->rx_rings[i]->tail; + /* this is to fake out the allocation routine + * into thinking it has to realloc everything + * but the recycling logic will let us re-use + * the buffers allocated above + */ + rx_rings[i].next_to_use = 0; + rx_rings[i].next_to_clean = 0; + rx_rings[i].next_to_alloc = 0; + *vsi->rx_rings[i] = rx_rings[i]; + } + devm_kfree(&pf->pdev->dev, rx_rings); + } + + ice_up(vsi); + } + goto done; + +free_tx: + /* error cleanup if the Rx allocations failed after getting Tx */ + if (tx_rings) { + for (i = 0; i < vsi->alloc_txq; i++) + ice_free_tx_ring(&tx_rings[i]); + devm_kfree(&pf->pdev->dev, tx_rings); + } + +done: + clear_bit(__ICE_CFG_BUSY, pf->state); + return err; +} + +static int ice_nway_reset(struct net_device *netdev) +{ + /* restart autonegotiation */ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_link_status *hw_link_info; + struct ice_vsi *vsi = np->vsi; + struct ice_port_info *pi; + enum ice_status status; + bool link_up; + + pi = vsi->port_info; + hw_link_info = &pi->phy.link_info; + link_up = hw_link_info->link_info & ICE_AQ_LINK_UP; + + status = ice_aq_set_link_restart_an(pi, link_up, NULL); + if (status) { + netdev_info(netdev, "link restart failed, err %d aq_err %d\n", + status, pi->hw->adminq.sq_last_status); + return -EIO; + } + + return 0; +} + +/** + * ice_get_pauseparam - Get Flow Control status + * @netdev: network interface device structure + * @pause: ethernet pause (flow control) parameters + */ +static void +ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_port_info *pi; + + pi = np->vsi->port_info; + pause->autoneg = + ((pi->phy.link_info.an_info & ICE_AQ_AN_COMPLETED) ? + AUTONEG_ENABLE : AUTONEG_DISABLE); + + if (pi->fc.current_mode == ICE_FC_RX_PAUSE) { + pause->rx_pause = 1; + } else if (pi->fc.current_mode == ICE_FC_TX_PAUSE) { + pause->tx_pause = 1; + } else if (pi->fc.current_mode == ICE_FC_FULL) { + pause->rx_pause = 1; + pause->tx_pause = 1; + } +} + +/** + * ice_set_pauseparam - Set Flow Control parameter + * @netdev: network interface device structure + * @pause: return tx/rx flow control status + */ +static int +ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_link_status *hw_link_info; + struct ice_pf *pf = np->vsi->back; + struct ice_vsi *vsi = np->vsi; + struct ice_hw *hw = &pf->hw; + struct ice_port_info *pi; + enum ice_status status; + u8 aq_failures; + bool link_up; + int err = 0; + + pi = vsi->port_info; + hw_link_info = &pi->phy.link_info; + link_up = hw_link_info->link_info & ICE_AQ_LINK_UP; + + /* Changing the port's flow control is not supported if this isn't the + * PF VSI + */ + if (vsi->type != ICE_VSI_PF) { + netdev_info(netdev, "Changing flow control parameters only supported for PF VSI\n"); + return -EOPNOTSUPP; + } + + if (pause->autoneg != (hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) { + netdev_info(netdev, "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n"); + return -EOPNOTSUPP; + } + + /* If we have link and don't have autoneg */ + if (!test_bit(__ICE_DOWN, pf->state) && + !(hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) { + /* Send message that it might not necessarily work*/ + netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n"); + } + + if (pause->rx_pause && pause->tx_pause) + pi->fc.req_mode = ICE_FC_FULL; + else if (pause->rx_pause && !pause->tx_pause) + pi->fc.req_mode = ICE_FC_RX_PAUSE; + else if (!pause->rx_pause && pause->tx_pause) + pi->fc.req_mode = ICE_FC_TX_PAUSE; + else if (!pause->rx_pause && !pause->tx_pause) + pi->fc.req_mode = ICE_FC_NONE; + else + return -EINVAL; + + /* Set the FC mode and only restart AN if link is up */ + status = ice_set_fc(pi, &aq_failures, link_up); + + if (aq_failures & ICE_SET_FC_AQ_FAIL_GET) { + netdev_info(netdev, "Set fc failed on the get_phy_capabilities call with err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + err = -EAGAIN; + } else if (aq_failures & ICE_SET_FC_AQ_FAIL_SET) { + netdev_info(netdev, "Set fc failed on the set_phy_config call with err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + err = -EAGAIN; + } else if (aq_failures & ICE_SET_FC_AQ_FAIL_UPDATE) { + netdev_info(netdev, "Set fc failed on the get_link_info call with err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + err = -EAGAIN; + } + + if (!test_bit(__ICE_DOWN, pf->state)) { + /* Give it a little more time to try to come back. If still + * down, restart autoneg link or reinitialize the interface. + */ + msleep(75); + if (!test_bit(__ICE_DOWN, pf->state)) + return ice_nway_reset(netdev); + + ice_down(vsi); + ice_up(vsi); + } + + return err; +} + +/** + * ice_get_rxfh_key_size - get the RSS hash key size + * @netdev: network interface device structure + * + * Returns the table size. + */ +static u32 ice_get_rxfh_key_size(struct net_device __always_unused *netdev) +{ + return ICE_VSIQF_HKEY_ARRAY_SIZE; +} + +/** + * ice_get_rxfh_indir_size - get the rx flow hash indirection table size + * @netdev: network interface device structure + * + * Returns the table size. + */ +static u32 ice_get_rxfh_indir_size(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + + return np->vsi->rss_table_size; +} + +/** + * ice_get_rxfh - get the rx flow hash indirection table + * @netdev: network interface device structure + * @indir: indirection table + * @key: hash key + * @hfunc: hash function + * + * Reads the indirection table directly from the hardware. + */ +static int +ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + int ret = 0, i; + u8 *lut; + + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + + if (!indir) + return 0; + + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + /* RSS not supported return error here */ + netdev_warn(netdev, "RSS is not configured on this VSI!\n"); + return -EIO; + } + + lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + if (ice_get_rss(vsi, key, lut, vsi->rss_table_size)) { + ret = -EIO; + goto out; + } + + for (i = 0; i < vsi->rss_table_size; i++) + indir[i] = (u32)(lut[i]); + +out: + devm_kfree(&pf->pdev->dev, lut); + return ret; +} + +/** + * ice_set_rxfh - set the rx flow hash indirection table + * @netdev: network interface device structure + * @indir: indirection table + * @key: hash key + * @hfunc: hash function + * + * Returns -EINVAL if the table specifies an invalid queue id, otherwise + * returns 0 after programming the table. + */ +static int ice_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + u8 *seed = NULL; + + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + /* RSS not supported return error here */ + netdev_warn(netdev, "RSS is not configured on this VSI!\n"); + return -EIO; + } + + if (key) { + if (!vsi->rss_hkey_user) { + vsi->rss_hkey_user = + devm_kzalloc(&pf->pdev->dev, + ICE_VSIQF_HKEY_ARRAY_SIZE, + GFP_KERNEL); + if (!vsi->rss_hkey_user) + return -ENOMEM; + } + memcpy(vsi->rss_hkey_user, key, ICE_VSIQF_HKEY_ARRAY_SIZE); + seed = vsi->rss_hkey_user; + } + + if (!vsi->rss_lut_user) { + vsi->rss_lut_user = devm_kzalloc(&pf->pdev->dev, + vsi->rss_table_size, + GFP_KERNEL); + if (!vsi->rss_lut_user) + return -ENOMEM; + } + + /* Each 32 bits pointed by 'indir' is stored with a lut entry */ + if (indir) { + int i; + + for (i = 0; i < vsi->rss_table_size; i++) + vsi->rss_lut_user[i] = (u8)(indir[i]); + } else { + ice_fill_rss_lut(vsi->rss_lut_user, vsi->rss_table_size, + vsi->rss_size); + } + + if (ice_set_rss(vsi, seed, vsi->rss_lut_user, vsi->rss_table_size)) + return -EIO; + + return 0; +} + +static const struct ethtool_ops ice_ethtool_ops = { + .get_link_ksettings = ice_get_link_ksettings, + .get_drvinfo = ice_get_drvinfo, + .get_regs_len = ice_get_regs_len, + .get_regs = ice_get_regs, + .get_msglevel = ice_get_msglevel, + .set_msglevel = ice_set_msglevel, + .get_link = ethtool_op_get_link, + .get_strings = ice_get_strings, + .get_ethtool_stats = ice_get_ethtool_stats, + .get_sset_count = ice_get_sset_count, + .get_rxnfc = ice_get_rxnfc, + .get_ringparam = ice_get_ringparam, + .set_ringparam = ice_set_ringparam, + .nway_reset = ice_nway_reset, + .get_pauseparam = ice_get_pauseparam, + .set_pauseparam = ice_set_pauseparam, + .get_rxfh_key_size = ice_get_rxfh_key_size, + .get_rxfh_indir_size = ice_get_rxfh_indir_size, + .get_rxfh = ice_get_rxfh, + .set_rxfh = ice_set_rxfh, +}; + +/** + * ice_set_ethtool_ops - setup netdev ethtool ops + * @netdev: network interface device structure + * + * setup netdev ethtool ops with ice specific ops + */ +void ice_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &ice_ethtool_ops; +} diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h new file mode 100644 index 000000000..6076fc87d --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -0,0 +1,256 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +/* Machine-generated file */ + +#ifndef _ICE_HW_AUTOGEN_H_ +#define _ICE_HW_AUTOGEN_H_ + +#define QTX_COMM_DBELL(_DBQM) (0x002C0000 + ((_DBQM) * 4)) +#define PF_FW_ARQBAH 0x00080180 +#define PF_FW_ARQBAL 0x00080080 +#define PF_FW_ARQH 0x00080380 +#define PF_FW_ARQH_ARQH_S 0 +#define PF_FW_ARQH_ARQH_M ICE_M(0x3FF, PF_FW_ARQH_ARQH_S) +#define PF_FW_ARQLEN 0x00080280 +#define PF_FW_ARQLEN_ARQLEN_S 0 +#define PF_FW_ARQLEN_ARQLEN_M ICE_M(0x3FF, PF_FW_ARQLEN_ARQLEN_S) +#define PF_FW_ARQLEN_ARQVFE_S 28 +#define PF_FW_ARQLEN_ARQVFE_M BIT(PF_FW_ARQLEN_ARQVFE_S) +#define PF_FW_ARQLEN_ARQOVFL_S 29 +#define PF_FW_ARQLEN_ARQOVFL_M BIT(PF_FW_ARQLEN_ARQOVFL_S) +#define PF_FW_ARQLEN_ARQCRIT_S 30 +#define PF_FW_ARQLEN_ARQCRIT_M BIT(PF_FW_ARQLEN_ARQCRIT_S) +#define PF_FW_ARQLEN_ARQENABLE_S 31 +#define PF_FW_ARQLEN_ARQENABLE_M BIT(PF_FW_ARQLEN_ARQENABLE_S) +#define PF_FW_ARQT 0x00080480 +#define PF_FW_ATQBAH 0x00080100 +#define PF_FW_ATQBAL 0x00080000 +#define PF_FW_ATQH 0x00080300 +#define PF_FW_ATQH_ATQH_S 0 +#define PF_FW_ATQH_ATQH_M ICE_M(0x3FF, PF_FW_ATQH_ATQH_S) +#define PF_FW_ATQLEN 0x00080200 +#define PF_FW_ATQLEN_ATQLEN_S 0 +#define PF_FW_ATQLEN_ATQLEN_M ICE_M(0x3FF, PF_FW_ATQLEN_ATQLEN_S) +#define PF_FW_ATQLEN_ATQVFE_S 28 +#define PF_FW_ATQLEN_ATQVFE_M BIT(PF_FW_ATQLEN_ATQVFE_S) +#define PF_FW_ATQLEN_ATQOVFL_S 29 +#define PF_FW_ATQLEN_ATQOVFL_M BIT(PF_FW_ATQLEN_ATQOVFL_S) +#define PF_FW_ATQLEN_ATQCRIT_S 30 +#define PF_FW_ATQLEN_ATQCRIT_M BIT(PF_FW_ATQLEN_ATQCRIT_S) +#define PF_FW_ATQLEN_ATQENABLE_S 31 +#define PF_FW_ATQLEN_ATQENABLE_M BIT(PF_FW_ATQLEN_ATQENABLE_S) +#define PF_FW_ATQT 0x00080400 + +#define GLFLXP_RXDID_FLAGS(_i, _j) (0x0045D000 + ((_i) * 4 + (_j) * 256)) +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S 0 +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S) +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S 8 +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S) +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S 16 +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S) +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S 24 +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S) +#define GLFLXP_RXDID_FLX_WRD_0(_i) (0x0045c800 + ((_i) * 4)) +#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S 0 +#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_M ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S) +#define GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_S 30 +#define GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_M ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_S) +#define GLFLXP_RXDID_FLX_WRD_1(_i) (0x0045c900 + ((_i) * 4)) +#define GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_S 0 +#define GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_M ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_S) +#define GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_S 30 +#define GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_M ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_S) +#define GLFLXP_RXDID_FLX_WRD_2(_i) (0x0045ca00 + ((_i) * 4)) +#define GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_S 0 +#define GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_M ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_S) +#define GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_S 30 +#define GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_M ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_S) +#define GLFLXP_RXDID_FLX_WRD_3(_i) (0x0045cb00 + ((_i) * 4)) +#define GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_S 0 +#define GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_M ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_S) +#define GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_S 30 +#define GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_M ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_S) + +#define QRXFLXP_CNTXT(_QRX) (0x00480000 + ((_QRX) * 4)) +#define QRXFLXP_CNTXT_RXDID_IDX_S 0 +#define QRXFLXP_CNTXT_RXDID_IDX_M ICE_M(0x3F, QRXFLXP_CNTXT_RXDID_IDX_S) +#define QRXFLXP_CNTXT_RXDID_PRIO_S 8 +#define QRXFLXP_CNTXT_RXDID_PRIO_M ICE_M(0x7, QRXFLXP_CNTXT_RXDID_PRIO_S) +#define QRXFLXP_CNTXT_TS_S 11 +#define QRXFLXP_CNTXT_TS_M BIT(QRXFLXP_CNTXT_TS_S) +#define GLGEN_RSTAT 0x000B8188 +#define GLGEN_RSTAT_DEVSTATE_S 0 +#define GLGEN_RSTAT_DEVSTATE_M ICE_M(0x3, GLGEN_RSTAT_DEVSTATE_S) +#define GLGEN_RSTCTL 0x000B8180 +#define GLGEN_RSTCTL_GRSTDEL_S 0 +#define GLGEN_RSTCTL_GRSTDEL_M ICE_M(0x3F, GLGEN_RSTCTL_GRSTDEL_S) +#define GLGEN_RSTAT_RESET_TYPE_S 2 +#define GLGEN_RSTAT_RESET_TYPE_M ICE_M(0x3, GLGEN_RSTAT_RESET_TYPE_S) +#define GLGEN_RTRIG 0x000B8190 +#define GLGEN_RTRIG_CORER_S 0 +#define GLGEN_RTRIG_CORER_M BIT(GLGEN_RTRIG_CORER_S) +#define GLGEN_RTRIG_GLOBR_S 1 +#define GLGEN_RTRIG_GLOBR_M BIT(GLGEN_RTRIG_GLOBR_S) +#define GLGEN_STAT 0x000B612C +#define PFGEN_CTRL 0x00091000 +#define PFGEN_CTRL_PFSWR_S 0 +#define PFGEN_CTRL_PFSWR_M BIT(PFGEN_CTRL_PFSWR_S) +#define PFGEN_STATE 0x00088000 +#define PRTGEN_STATUS 0x000B8100 +#define PFHMC_ERRORDATA 0x00520500 +#define PFHMC_ERRORINFO 0x00520400 +#define GLINT_DYN_CTL(_INT) (0x00160000 + ((_INT) * 4)) +#define GLINT_DYN_CTL_INTENA_S 0 +#define GLINT_DYN_CTL_INTENA_M BIT(GLINT_DYN_CTL_INTENA_S) +#define GLINT_DYN_CTL_CLEARPBA_S 1 +#define GLINT_DYN_CTL_CLEARPBA_M BIT(GLINT_DYN_CTL_CLEARPBA_S) +#define GLINT_DYN_CTL_SWINT_TRIG_S 2 +#define GLINT_DYN_CTL_SWINT_TRIG_M BIT(GLINT_DYN_CTL_SWINT_TRIG_S) +#define GLINT_DYN_CTL_ITR_INDX_S 3 +#define GLINT_DYN_CTL_SW_ITR_INDX_S 25 +#define GLINT_DYN_CTL_SW_ITR_INDX_M ICE_M(0x3, GLINT_DYN_CTL_SW_ITR_INDX_S) +#define GLINT_DYN_CTL_INTENA_MSK_S 31 +#define GLINT_DYN_CTL_INTENA_MSK_M BIT(GLINT_DYN_CTL_INTENA_MSK_S) +#define GLINT_ITR(_i, _INT) (0x00154000 + ((_i) * 8192 + (_INT) * 4)) +#define PFINT_FW_CTL 0x0016C800 +#define PFINT_FW_CTL_MSIX_INDX_S 0 +#define PFINT_FW_CTL_MSIX_INDX_M ICE_M(0x7FF, PFINT_FW_CTL_MSIX_INDX_S) +#define PFINT_FW_CTL_ITR_INDX_S 11 +#define PFINT_FW_CTL_ITR_INDX_M ICE_M(0x3, PFINT_FW_CTL_ITR_INDX_S) +#define PFINT_FW_CTL_CAUSE_ENA_S 30 +#define PFINT_FW_CTL_CAUSE_ENA_M BIT(PFINT_FW_CTL_CAUSE_ENA_S) +#define PFINT_OICR 0x0016CA00 +#define PFINT_OICR_ECC_ERR_S 16 +#define PFINT_OICR_ECC_ERR_M BIT(PFINT_OICR_ECC_ERR_S) +#define PFINT_OICR_MAL_DETECT_S 19 +#define PFINT_OICR_MAL_DETECT_M BIT(PFINT_OICR_MAL_DETECT_S) +#define PFINT_OICR_GRST_S 20 +#define PFINT_OICR_GRST_M BIT(PFINT_OICR_GRST_S) +#define PFINT_OICR_PCI_EXCEPTION_S 21 +#define PFINT_OICR_PCI_EXCEPTION_M BIT(PFINT_OICR_PCI_EXCEPTION_S) +#define PFINT_OICR_HMC_ERR_S 26 +#define PFINT_OICR_HMC_ERR_M BIT(PFINT_OICR_HMC_ERR_S) +#define PFINT_OICR_PE_CRITERR_S 28 +#define PFINT_OICR_PE_CRITERR_M BIT(PFINT_OICR_PE_CRITERR_S) +#define PFINT_OICR_CTL 0x0016CA80 +#define PFINT_OICR_CTL_MSIX_INDX_S 0 +#define PFINT_OICR_CTL_MSIX_INDX_M ICE_M(0x7FF, PFINT_OICR_CTL_MSIX_INDX_S) +#define PFINT_OICR_CTL_ITR_INDX_S 11 +#define PFINT_OICR_CTL_ITR_INDX_M ICE_M(0x3, PFINT_OICR_CTL_ITR_INDX_S) +#define PFINT_OICR_CTL_CAUSE_ENA_S 30 +#define PFINT_OICR_CTL_CAUSE_ENA_M BIT(PFINT_OICR_CTL_CAUSE_ENA_S) +#define PFINT_OICR_ENA 0x0016C900 +#define QINT_RQCTL(_QRX) (0x00150000 + ((_QRX) * 4)) +#define QINT_RQCTL_MSIX_INDX_S 0 +#define QINT_RQCTL_ITR_INDX_S 11 +#define QINT_RQCTL_CAUSE_ENA_S 30 +#define QINT_RQCTL_CAUSE_ENA_M BIT(QINT_RQCTL_CAUSE_ENA_S) +#define QINT_TQCTL(_DBQM) (0x00140000 + ((_DBQM) * 4)) +#define QINT_TQCTL_MSIX_INDX_S 0 +#define QINT_TQCTL_ITR_INDX_S 11 +#define QINT_TQCTL_CAUSE_ENA_S 30 +#define QINT_TQCTL_CAUSE_ENA_M BIT(QINT_TQCTL_CAUSE_ENA_S) +#define GLLAN_RCTL_0 0x002941F8 +#define QRX_CONTEXT(_i, _QRX) (0x00280000 + ((_i) * 8192 + (_QRX) * 4)) +#define QRX_CTRL(_QRX) (0x00120000 + ((_QRX) * 4)) +#define QRX_CTRL_MAX_INDEX 2047 +#define QRX_CTRL_QENA_REQ_S 0 +#define QRX_CTRL_QENA_REQ_M BIT(QRX_CTRL_QENA_REQ_S) +#define QRX_CTRL_QENA_STAT_S 2 +#define QRX_CTRL_QENA_STAT_M BIT(QRX_CTRL_QENA_STAT_S) +#define QRX_ITR(_QRX) (0x00292000 + ((_QRX) * 4)) +#define QRX_TAIL(_QRX) (0x00290000 + ((_QRX) * 4)) +#define GLNVM_FLA 0x000B6108 +#define GLNVM_FLA_LOCKED_S 6 +#define GLNVM_FLA_LOCKED_M BIT(GLNVM_FLA_LOCKED_S) +#define GLNVM_GENS 0x000B6100 +#define GLNVM_GENS_SR_SIZE_S 5 +#define GLNVM_GENS_SR_SIZE_M ICE_M(0x7, GLNVM_GENS_SR_SIZE_S) +#define GLNVM_ULD 0x000B6008 +#define GLNVM_ULD_CORER_DONE_S 3 +#define GLNVM_ULD_CORER_DONE_M BIT(GLNVM_ULD_CORER_DONE_S) +#define GLNVM_ULD_GLOBR_DONE_S 4 +#define GLNVM_ULD_GLOBR_DONE_M BIT(GLNVM_ULD_GLOBR_DONE_S) +#define PF_FUNC_RID 0x0009E880 +#define PF_FUNC_RID_FUNC_NUM_S 0 +#define PF_FUNC_RID_FUNC_NUM_M ICE_M(0x7, PF_FUNC_RID_FUNC_NUM_S) +#define GLPRT_BPRCH(_i) (0x00381384 + ((_i) * 8)) +#define GLPRT_BPRCL(_i) (0x00381380 + ((_i) * 8)) +#define GLPRT_BPTCH(_i) (0x00381244 + ((_i) * 8)) +#define GLPRT_BPTCL(_i) (0x00381240 + ((_i) * 8)) +#define GLPRT_CRCERRS(_i) (0x00380100 + ((_i) * 8)) +#define GLPRT_GORCH(_i) (0x00380004 + ((_i) * 8)) +#define GLPRT_GORCL(_i) (0x00380000 + ((_i) * 8)) +#define GLPRT_GOTCH(_i) (0x00380B44 + ((_i) * 8)) +#define GLPRT_GOTCL(_i) (0x00380B40 + ((_i) * 8)) +#define GLPRT_ILLERRC(_i) (0x003801C0 + ((_i) * 8)) +#define GLPRT_LXOFFRXC(_i) (0x003802C0 + ((_i) * 8)) +#define GLPRT_LXOFFTXC(_i) (0x00381180 + ((_i) * 8)) +#define GLPRT_LXONRXC(_i) (0x00380280 + ((_i) * 8)) +#define GLPRT_LXONTXC(_i) (0x00381140 + ((_i) * 8)) +#define GLPRT_MLFC(_i) (0x00380040 + ((_i) * 8)) +#define GLPRT_MPRCH(_i) (0x00381344 + ((_i) * 8)) +#define GLPRT_MPRCL(_i) (0x00381340 + ((_i) * 8)) +#define GLPRT_MPTCH(_i) (0x00381204 + ((_i) * 8)) +#define GLPRT_MPTCL(_i) (0x00381200 + ((_i) * 8)) +#define GLPRT_MRFC(_i) (0x00380080 + ((_i) * 8)) +#define GLPRT_PRC1023H(_i) (0x00380A04 + ((_i) * 8)) +#define GLPRT_PRC1023L(_i) (0x00380A00 + ((_i) * 8)) +#define GLPRT_PRC127H(_i) (0x00380944 + ((_i) * 8)) +#define GLPRT_PRC127L(_i) (0x00380940 + ((_i) * 8)) +#define GLPRT_PRC1522H(_i) (0x00380A44 + ((_i) * 8)) +#define GLPRT_PRC1522L(_i) (0x00380A40 + ((_i) * 8)) +#define GLPRT_PRC255H(_i) (0x00380984 + ((_i) * 8)) +#define GLPRT_PRC255L(_i) (0x00380980 + ((_i) * 8)) +#define GLPRT_PRC511H(_i) (0x003809C4 + ((_i) * 8)) +#define GLPRT_PRC511L(_i) (0x003809C0 + ((_i) * 8)) +#define GLPRT_PRC64H(_i) (0x00380904 + ((_i) * 8)) +#define GLPRT_PRC64L(_i) (0x00380900 + ((_i) * 8)) +#define GLPRT_PRC9522H(_i) (0x00380A84 + ((_i) * 8)) +#define GLPRT_PRC9522L(_i) (0x00380A80 + ((_i) * 8)) +#define GLPRT_PTC1023H(_i) (0x00380C84 + ((_i) * 8)) +#define GLPRT_PTC1023L(_i) (0x00380C80 + ((_i) * 8)) +#define GLPRT_PTC127H(_i) (0x00380BC4 + ((_i) * 8)) +#define GLPRT_PTC127L(_i) (0x00380BC0 + ((_i) * 8)) +#define GLPRT_PTC1522H(_i) (0x00380CC4 + ((_i) * 8)) +#define GLPRT_PTC1522L(_i) (0x00380CC0 + ((_i) * 8)) +#define GLPRT_PTC255H(_i) (0x00380C04 + ((_i) * 8)) +#define GLPRT_PTC255L(_i) (0x00380C00 + ((_i) * 8)) +#define GLPRT_PTC511H(_i) (0x00380C44 + ((_i) * 8)) +#define GLPRT_PTC511L(_i) (0x00380C40 + ((_i) * 8)) +#define GLPRT_PTC64H(_i) (0x00380B84 + ((_i) * 8)) +#define GLPRT_PTC64L(_i) (0x00380B80 + ((_i) * 8)) +#define GLPRT_PTC9522H(_i) (0x00380D04 + ((_i) * 8)) +#define GLPRT_PTC9522L(_i) (0x00380D00 + ((_i) * 8)) +#define GLPRT_RFC(_i) (0x00380AC0 + ((_i) * 8)) +#define GLPRT_RJC(_i) (0x00380B00 + ((_i) * 8)) +#define GLPRT_RLEC(_i) (0x00380140 + ((_i) * 8)) +#define GLPRT_ROC(_i) (0x00380240 + ((_i) * 8)) +#define GLPRT_RUC(_i) (0x00380200 + ((_i) * 8)) +#define GLPRT_TDOLD(_i) (0x00381280 + ((_i) * 8)) +#define GLPRT_UPRCH(_i) (0x00381304 + ((_i) * 8)) +#define GLPRT_UPRCL(_i) (0x00381300 + ((_i) * 8)) +#define GLPRT_UPTCH(_i) (0x003811C4 + ((_i) * 8)) +#define GLPRT_UPTCL(_i) (0x003811C0 + ((_i) * 8)) +#define GLV_BPRCH(_i) (0x003B6004 + ((_i) * 8)) +#define GLV_BPRCL(_i) (0x003B6000 + ((_i) * 8)) +#define GLV_BPTCH(_i) (0x0030E004 + ((_i) * 8)) +#define GLV_BPTCL(_i) (0x0030E000 + ((_i) * 8)) +#define GLV_GORCH(_i) (0x003B0004 + ((_i) * 8)) +#define GLV_GORCL(_i) (0x003B0000 + ((_i) * 8)) +#define GLV_GOTCH(_i) (0x00300004 + ((_i) * 8)) +#define GLV_GOTCL(_i) (0x00300000 + ((_i) * 8)) +#define GLV_MPRCH(_i) (0x003B4004 + ((_i) * 8)) +#define GLV_MPRCL(_i) (0x003B4000 + ((_i) * 8)) +#define GLV_MPTCH(_i) (0x0030C004 + ((_i) * 8)) +#define GLV_MPTCL(_i) (0x0030C000 + ((_i) * 8)) +#define GLV_RDPC(_i) (0x00294C04 + ((_i) * 4)) +#define GLV_TEPC(_VSI) (0x00312000 + ((_VSI) * 4)) +#define GLV_UPRCH(_i) (0x003B2004 + ((_i) * 8)) +#define GLV_UPRCL(_i) (0x003B2000 + ((_i) * 8)) +#define GLV_UPTCH(_i) (0x0030A004 + ((_i) * 8)) +#define GLV_UPTCL(_i) (0x0030A000 + ((_i) * 8)) +#define VSIQF_HKEY_MAX_INDEX 12 + +#endif /* _ICE_HW_AUTOGEN_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h new file mode 100644 index 000000000..068dbc740 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -0,0 +1,474 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_LAN_TX_RX_H_ +#define _ICE_LAN_TX_RX_H_ + +union ice_32byte_rx_desc { + struct { + __le64 pkt_addr; /* Packet buffer address */ + __le64 hdr_addr; /* Header buffer address */ + /* bit 0 of hdr_addr is DD bit */ + __le64 rsvd1; + __le64 rsvd2; + } read; + struct { + struct { + struct { + __le16 mirroring_status; + __le16 l2tag1; + } lo_dword; + union { + __le32 rss; /* RSS Hash */ + __le32 fd_id; /* Flow Director filter id */ + } hi_dword; + } qword0; + struct { + /* status/error/PTYPE/length */ + __le64 status_error_len; + } qword1; + struct { + __le16 ext_status; /* extended status */ + __le16 rsvd; + __le16 l2tag2_1; + __le16 l2tag2_2; + } qword2; + struct { + __le32 reserved; + __le32 fd_id; + } qword3; + } wb; /* writeback */ +}; + +struct ice_rx_ptype_decoded { + u32 ptype:10; + u32 known:1; + u32 outer_ip:1; + u32 outer_ip_ver:2; + u32 outer_frag:1; + u32 tunnel_type:3; + u32 tunnel_end_prot:2; + u32 tunnel_end_frag:1; + u32 inner_prot:4; + u32 payload_layer:3; +}; + +enum ice_rx_ptype_outer_ip { + ICE_RX_PTYPE_OUTER_L2 = 0, + ICE_RX_PTYPE_OUTER_IP = 1, +}; + +enum ice_rx_ptype_outer_ip_ver { + ICE_RX_PTYPE_OUTER_NONE = 0, + ICE_RX_PTYPE_OUTER_IPV4 = 1, + ICE_RX_PTYPE_OUTER_IPV6 = 2, +}; + +enum ice_rx_ptype_outer_fragmented { + ICE_RX_PTYPE_NOT_FRAG = 0, + ICE_RX_PTYPE_FRAG = 1, +}; + +enum ice_rx_ptype_tunnel_type { + ICE_RX_PTYPE_TUNNEL_NONE = 0, + ICE_RX_PTYPE_TUNNEL_IP_IP = 1, + ICE_RX_PTYPE_TUNNEL_IP_GRENAT = 2, + ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC = 3, + ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4, +}; + +enum ice_rx_ptype_tunnel_end_prot { + ICE_RX_PTYPE_TUNNEL_END_NONE = 0, + ICE_RX_PTYPE_TUNNEL_END_IPV4 = 1, + ICE_RX_PTYPE_TUNNEL_END_IPV6 = 2, +}; + +enum ice_rx_ptype_inner_prot { + ICE_RX_PTYPE_INNER_PROT_NONE = 0, + ICE_RX_PTYPE_INNER_PROT_UDP = 1, + ICE_RX_PTYPE_INNER_PROT_TCP = 2, + ICE_RX_PTYPE_INNER_PROT_SCTP = 3, + ICE_RX_PTYPE_INNER_PROT_ICMP = 4, + ICE_RX_PTYPE_INNER_PROT_TIMESYNC = 5, +}; + +enum ice_rx_ptype_payload_layer { + ICE_RX_PTYPE_PAYLOAD_LAYER_NONE = 0, + ICE_RX_PTYPE_PAYLOAD_LAYER_PAY2 = 1, + ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3 = 2, + ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4 = 3, +}; + +/* RX Flex Descriptor + * This descriptor is used instead of the legacy version descriptor when + * ice_rlan_ctx.adv_desc is set + */ +union ice_32b_rx_flex_desc { + struct { + __le64 pkt_addr; /* Packet buffer address */ + __le64 hdr_addr; /* Header buffer address */ + /* bit 0 of hdr_addr is DD bit */ + __le64 rsvd1; + __le64 rsvd2; + } read; + struct { + /* Qword 0 */ + u8 rxdid; /* descriptor builder profile id */ + u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */ + __le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */ + __le16 pkt_len; /* [15:14] are reserved */ + __le16 hdr_len_sph_flex_flags1; /* header=[10:0] */ + /* sph=[11:11] */ + /* ff1/ext=[15:12] */ + + /* Qword 1 */ + __le16 status_error0; + __le16 l2tag1; + __le16 flex_meta0; + __le16 flex_meta1; + + /* Qword 2 */ + __le16 status_error1; + u8 flex_flags2; + u8 time_stamp_low; + __le16 l2tag2_1st; + __le16 l2tag2_2nd; + + /* Qword 3 */ + __le16 flex_meta2; + __le16 flex_meta3; + union { + struct { + __le16 flex_meta4; + __le16 flex_meta5; + } flex; + __le32 ts_high; + } flex_ts; + } wb; /* writeback */ +}; + +/* Rx Flex Descriptor NIC Profile + * This descriptor corresponds to RxDID 2 which contains + * metadata fields for RSS, flow id and timestamp info + */ +struct ice_32b_rx_flex_desc_nic { + /* Qword 0 */ + u8 rxdid; + u8 mir_id_umb_cast; + __le16 ptype_flexi_flags0; + __le16 pkt_len; + __le16 hdr_len_sph_flex_flags1; + + /* Qword 1 */ + __le16 status_error0; + __le16 l2tag1; + __le32 rss_hash; + + /* Qword 2 */ + __le16 status_error1; + u8 flexi_flags2; + u8 ts_low; + __le16 l2tag2_1st; + __le16 l2tag2_2nd; + + /* Qword 3 */ + __le32 flow_id; + union { + struct { + __le16 vlan_id; + __le16 flow_id_ipv6; + } flex; + __le32 ts_high; + } flex_ts; +}; + +/* Receive Flex Descriptor profile IDs: There are a total + * of 64 profiles where profile IDs 0/1 are for legacy; and + * profiles 2-63 are flex profiles that can be programmed + * with a specific metadata (profile 7 reserved for HW) + */ +enum ice_rxdid { + ICE_RXDID_START = 0, + ICE_RXDID_LEGACY_0 = ICE_RXDID_START, + ICE_RXDID_LEGACY_1, + ICE_RXDID_FLX_START, + ICE_RXDID_FLEX_NIC = ICE_RXDID_FLX_START, + ICE_RXDID_FLX_LAST = 63, + ICE_RXDID_LAST = ICE_RXDID_FLX_LAST +}; + +/* Receive Flex Descriptor Rx opcode values */ +#define ICE_RX_OPC_MDID 0x01 + +/* Receive Descriptor MDID values */ +#define ICE_RX_MDID_FLOW_ID_LOWER 5 +#define ICE_RX_MDID_FLOW_ID_HIGH 6 +#define ICE_RX_MDID_HASH_LOW 56 +#define ICE_RX_MDID_HASH_HIGH 57 + +/* Rx Flag64 packet flag bits */ +enum ice_rx_flg64_bits { + ICE_RXFLG_PKT_DSI = 0, + ICE_RXFLG_EVLAN_x8100 = 15, + ICE_RXFLG_EVLAN_x9100, + ICE_RXFLG_VLAN_x8100, + ICE_RXFLG_TNL_MAC = 22, + ICE_RXFLG_TNL_VLAN, + ICE_RXFLG_PKT_FRG, + ICE_RXFLG_FIN = 32, + ICE_RXFLG_SYN, + ICE_RXFLG_RST, + ICE_RXFLG_TNL0 = 38, + ICE_RXFLG_TNL1, + ICE_RXFLG_TNL2, + ICE_RXFLG_UDP_GRE, + ICE_RXFLG_RSVD = 63 +}; + +/* for ice_32byte_rx_flex_desc.ptype_flexi_flags0 member */ +#define ICE_RX_FLEX_DESC_PTYPE_M (0x3FF) /* 10-bits */ + +/* for ice_32byte_rx_flex_desc.pkt_length member */ +#define ICE_RX_FLX_DESC_PKT_LEN_M (0x3FFF) /* 14-bits */ + +enum ice_rx_flex_desc_status_error_0_bits { + /* Note: These are predefined bit offsets */ + ICE_RX_FLEX_DESC_STATUS0_DD_S = 0, + ICE_RX_FLEX_DESC_STATUS0_EOF_S, + ICE_RX_FLEX_DESC_STATUS0_HBO_S, + ICE_RX_FLEX_DESC_STATUS0_L3L4P_S, + ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S, + ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S, + ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S, + ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S, + ICE_RX_FLEX_DESC_STATUS0_LPBK_S, + ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S, + ICE_RX_FLEX_DESC_STATUS0_RXE_S, + ICE_RX_FLEX_DESC_STATUS0_CRCP_S, + ICE_RX_FLEX_DESC_STATUS0_RSS_VALID_S, + ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S, + ICE_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_S, + ICE_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_S, + ICE_RX_FLEX_DESC_STATUS0_LAST /* this entry must be last!!! */ +}; + +#define ICE_RXQ_CTX_SIZE_DWORDS 8 +#define ICE_RXQ_CTX_SZ (ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32)) + +/* RLAN Rx queue context data + * + * The sizes of the variables may be larger than needed due to crossing byte + * boundaries. If we do not have the width of the variable set to the correct + * size then we could end up shifting bits off the top of the variable when the + * variable is at the top of a byte and crosses over into the next byte. + */ +struct ice_rlan_ctx { + u16 head; + u16 cpuid; /* bigger than needed, see above for reason */ +#define ICE_RLAN_BASE_S 7 + u64 base; + u16 qlen; +#define ICE_RLAN_CTX_DBUF_S 7 + u16 dbuf; /* bigger than needed, see above for reason */ +#define ICE_RLAN_CTX_HBUF_S 6 + u16 hbuf; /* bigger than needed, see above for reason */ + u8 dtype; + u8 dsize; + u8 crcstrip; + u8 l2tsel; + u8 hsplit_0; + u8 hsplit_1; + u8 showiv; + u32 rxmax; /* bigger than needed, see above for reason */ + u8 tphrdesc_ena; + u8 tphwdesc_ena; + u8 tphdata_ena; + u8 tphhead_ena; + u16 lrxqthresh; /* bigger than needed, see above for reason */ +}; + +struct ice_ctx_ele { + u16 offset; + u16 size_of; + u16 width; + u16 lsb; +}; + +#define ICE_CTX_STORE(_struct, _ele, _width, _lsb) { \ + .offset = offsetof(struct _struct, _ele), \ + .size_of = FIELD_SIZEOF(struct _struct, _ele), \ + .width = _width, \ + .lsb = _lsb, \ +} + +/* for hsplit_0 field of Rx RLAN context */ +enum ice_rlan_ctx_rx_hsplit_0 { + ICE_RLAN_RX_HSPLIT_0_NO_SPLIT = 0, + ICE_RLAN_RX_HSPLIT_0_SPLIT_L2 = 1, + ICE_RLAN_RX_HSPLIT_0_SPLIT_IP = 2, + ICE_RLAN_RX_HSPLIT_0_SPLIT_TCP_UDP = 4, + ICE_RLAN_RX_HSPLIT_0_SPLIT_SCTP = 8, +}; + +/* for hsplit_1 field of Rx RLAN context */ +enum ice_rlan_ctx_rx_hsplit_1 { + ICE_RLAN_RX_HSPLIT_1_NO_SPLIT = 0, + ICE_RLAN_RX_HSPLIT_1_SPLIT_L2 = 1, + ICE_RLAN_RX_HSPLIT_1_SPLIT_ALWAYS = 2, +}; + +/* TX Descriptor */ +struct ice_tx_desc { + __le64 buf_addr; /* Address of descriptor's data buf */ + __le64 cmd_type_offset_bsz; +}; + +enum ice_tx_desc_dtype_value { + ICE_TX_DESC_DTYPE_DATA = 0x0, + ICE_TX_DESC_DTYPE_CTX = 0x1, + /* DESC_DONE - HW has completed write-back of descriptor */ + ICE_TX_DESC_DTYPE_DESC_DONE = 0xF, +}; + +#define ICE_TXD_QW1_CMD_S 4 +#define ICE_TXD_QW1_CMD_M (0xFFFUL << ICE_TXD_QW1_CMD_S) + +enum ice_tx_desc_cmd_bits { + ICE_TX_DESC_CMD_EOP = 0x0001, + ICE_TX_DESC_CMD_RS = 0x0002, + ICE_TX_DESC_CMD_IL2TAG1 = 0x0008, + ICE_TX_DESC_CMD_IIPT_IPV6 = 0x0020, /* 2 BITS */ + ICE_TX_DESC_CMD_IIPT_IPV4 = 0x0040, /* 2 BITS */ + ICE_TX_DESC_CMD_IIPT_IPV4_CSUM = 0x0060, /* 2 BITS */ + ICE_TX_DESC_CMD_L4T_EOFT_TCP = 0x0100, /* 2 BITS */ + ICE_TX_DESC_CMD_L4T_EOFT_UDP = 0x0300, /* 2 BITS */ +}; + +#define ICE_TXD_QW1_OFFSET_S 16 +#define ICE_TXD_QW1_OFFSET_M (0x3FFFFULL << ICE_TXD_QW1_OFFSET_S) + +enum ice_tx_desc_len_fields { + /* Note: These are predefined bit offsets */ + ICE_TX_DESC_LEN_MACLEN_S = 0, /* 7 BITS */ + ICE_TX_DESC_LEN_IPLEN_S = 7, /* 7 BITS */ + ICE_TX_DESC_LEN_L4_LEN_S = 14 /* 4 BITS */ +}; + +#define ICE_TXD_QW1_MACLEN_M (0x7FUL << ICE_TX_DESC_LEN_MACLEN_S) +#define ICE_TXD_QW1_IPLEN_M (0x7FUL << ICE_TX_DESC_LEN_IPLEN_S) +#define ICE_TXD_QW1_L4LEN_M (0xFUL << ICE_TX_DESC_LEN_L4_LEN_S) + +/* Tx descriptor field limits in bytes */ +#define ICE_TXD_MACLEN_MAX ((ICE_TXD_QW1_MACLEN_M >> \ + ICE_TX_DESC_LEN_MACLEN_S) * ICE_BYTES_PER_WORD) +#define ICE_TXD_IPLEN_MAX ((ICE_TXD_QW1_IPLEN_M >> \ + ICE_TX_DESC_LEN_IPLEN_S) * ICE_BYTES_PER_DWORD) +#define ICE_TXD_L4LEN_MAX ((ICE_TXD_QW1_L4LEN_M >> \ + ICE_TX_DESC_LEN_L4_LEN_S) * ICE_BYTES_PER_DWORD) + +#define ICE_TXD_QW1_TX_BUF_SZ_S 34 +#define ICE_TXD_QW1_L2TAG1_S 48 + +/* Context descriptors */ +struct ice_tx_ctx_desc { + __le32 tunneling_params; + __le16 l2tag2; + __le16 rsvd; + __le64 qw1; +}; + +#define ICE_TXD_CTX_QW1_CMD_S 4 +#define ICE_TXD_CTX_QW1_CMD_M (0x7FUL << ICE_TXD_CTX_QW1_CMD_S) + +#define ICE_TXD_CTX_QW1_TSO_LEN_S 30 +#define ICE_TXD_CTX_QW1_TSO_LEN_M \ + (0x3FFFFULL << ICE_TXD_CTX_QW1_TSO_LEN_S) + +#define ICE_TXD_CTX_QW1_MSS_S 50 + +enum ice_tx_ctx_desc_cmd_bits { + ICE_TX_CTX_DESC_TSO = 0x01, + ICE_TX_CTX_DESC_TSYN = 0x02, + ICE_TX_CTX_DESC_IL2TAG2 = 0x04, + ICE_TX_CTX_DESC_IL2TAG2_IL2H = 0x08, + ICE_TX_CTX_DESC_SWTCH_NOTAG = 0x00, + ICE_TX_CTX_DESC_SWTCH_UPLINK = 0x10, + ICE_TX_CTX_DESC_SWTCH_LOCAL = 0x20, + ICE_TX_CTX_DESC_SWTCH_VSI = 0x30, + ICE_TX_CTX_DESC_RESERVED = 0x40 +}; + +#define ICE_LAN_TXQ_MAX_QGRPS 127 +#define ICE_LAN_TXQ_MAX_QDIS 1023 + +/* Tx queue context data + * + * The sizes of the variables may be larger than needed due to crossing byte + * boundaries. If we do not have the width of the variable set to the correct + * size then we could end up shifting bits off the top of the variable when the + * variable is at the top of a byte and crosses over into the next byte. + */ +struct ice_tlan_ctx { +#define ICE_TLAN_CTX_BASE_S 7 + u64 base; /* base is defined in 128-byte units */ + u8 port_num; + u16 cgd_num; /* bigger than needed, see above for reason */ + u8 pf_num; + u16 vmvf_num; + u8 vmvf_type; +#define ICE_TLAN_CTX_VMVF_TYPE_VMQ 1 +#define ICE_TLAN_CTX_VMVF_TYPE_PF 2 + u16 src_vsi; + u8 tsyn_ena; + u8 alt_vlan; + u16 cpuid; /* bigger than needed, see above for reason */ + u8 wb_mode; + u8 tphrd_desc; + u8 tphrd; + u8 tphwr_desc; + u16 cmpq_id; + u16 qnum_in_func; + u8 itr_notification_mode; + u8 adjust_prof_id; + u32 qlen; /* bigger than needed, see above for reason */ + u8 quanta_prof_idx; + u8 tso_ena; + u16 tso_qnum; + u8 legacy_int; + u8 drop_ena; + u8 cache_prof_idx; + u8 pkt_shaper_prof_idx; + u8 int_q_state; /* width not needed - internal do not write */ +}; + +/* macro to make the table lines short */ +#define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ + { PTYPE, \ + 1, \ + ICE_RX_PTYPE_OUTER_##OUTER_IP, \ + ICE_RX_PTYPE_OUTER_##OUTER_IP_VER, \ + ICE_RX_PTYPE_##OUTER_FRAG, \ + ICE_RX_PTYPE_TUNNEL_##T, \ + ICE_RX_PTYPE_TUNNEL_END_##TE, \ + ICE_RX_PTYPE_##TEF, \ + ICE_RX_PTYPE_INNER_PROT_##I, \ + ICE_RX_PTYPE_PAYLOAD_LAYER_##PL } + +#define ICE_PTT_UNUSED_ENTRY(PTYPE) { PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 } + +/* shorter macros makes the table fit but are terse */ +#define ICE_RX_PTYPE_NOF ICE_RX_PTYPE_NOT_FRAG + +/* Lookup table mapping the HW PTYPE to the bit field for decoding */ +static const struct ice_rx_ptype_decoded ice_ptype_lkup[] = { + /* L2 Packet types */ + ICE_PTT_UNUSED_ENTRY(0), + ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), + ICE_PTT(2, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), +}; + +static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype) +{ + return ice_ptype_lkup[ptype]; +} +#endif /* _ICE_LAN_TX_RX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c new file mode 100644 index 000000000..e513c46bd --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -0,0 +1,5519 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +/* Intel(R) Ethernet Connection E800 Series Linux Driver */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "ice.h" + +#define DRV_VERSION "0.7.1-k" +#define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver" +const char ice_drv_ver[] = DRV_VERSION; +static const char ice_driver_string[] = DRV_SUMMARY; +static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation."; + +MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); +MODULE_DESCRIPTION(DRV_SUMMARY); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_VERSION); + +static int debug = -1; +module_param(debug, int, 0644); +#ifndef CONFIG_DYNAMIC_DEBUG +MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)"); +#else +MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)"); +#endif /* !CONFIG_DYNAMIC_DEBUG */ + +static struct workqueue_struct *ice_wq; +static const struct net_device_ops ice_netdev_ops; + +static void ice_pf_dis_all_vsi(struct ice_pf *pf); +static void ice_rebuild(struct ice_pf *pf); +static int ice_vsi_release(struct ice_vsi *vsi); +static void ice_update_vsi_stats(struct ice_vsi *vsi); +static void ice_update_pf_stats(struct ice_pf *pf); + +/** + * ice_get_free_slot - get the next non-NULL location index in array + * @array: array to search + * @size: size of the array + * @curr: last known occupied index to be used as a search hint + * + * void * is being used to keep the functionality generic. This lets us use this + * function on any array of pointers. + */ +static int ice_get_free_slot(void *array, int size, int curr) +{ + int **tmp_array = (int **)array; + int next; + + if (curr < (size - 1) && !tmp_array[curr + 1]) { + next = curr + 1; + } else { + int i = 0; + + while ((i < size) && (tmp_array[i])) + i++; + if (i == size) + next = ICE_NO_VSI; + else + next = i; + } + return next; +} + +/** + * ice_search_res - Search the tracker for a block of resources + * @res: pointer to the resource + * @needed: size of the block needed + * @id: identifier to track owner + * Returns the base item index of the block, or -ENOMEM for error + */ +static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id) +{ + int start = res->search_hint; + int end = start; + + id |= ICE_RES_VALID_BIT; + + do { + /* skip already allocated entries */ + if (res->list[end++] & ICE_RES_VALID_BIT) { + start = end; + if ((start + needed) > res->num_entries) + break; + } + + if (end == (start + needed)) { + int i = start; + + /* there was enough, so assign it to the requestor */ + while (i != end) + res->list[i++] = id; + + if (end == res->num_entries) + end = 0; + + res->search_hint = end; + return start; + } + } while (1); + + return -ENOMEM; +} + +/** + * ice_get_res - get a block of resources + * @pf: board private structure + * @res: pointer to the resource + * @needed: size of the block needed + * @id: identifier to track owner + * + * Returns the base item index of the block, or -ENOMEM for error + * The search_hint trick and lack of advanced fit-finding only works + * because we're highly likely to have all the same sized requests. + * Linear search time and any fragmentation should be minimal. + */ +static int +ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id) +{ + int ret; + + if (!res || !pf) + return -EINVAL; + + if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) { + dev_err(&pf->pdev->dev, + "param err: needed=%d, num_entries = %d id=0x%04x\n", + needed, res->num_entries, id); + return -EINVAL; + } + + /* search based on search_hint */ + ret = ice_search_res(res, needed, id); + + if (ret < 0) { + /* previous search failed. Reset search hint and try again */ + res->search_hint = 0; + ret = ice_search_res(res, needed, id); + } + + return ret; +} + +/** + * ice_free_res - free a block of resources + * @res: pointer to the resource + * @index: starting index previously returned by ice_get_res + * @id: identifier to track owner + * Returns number of resources freed + */ +static int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id) +{ + int count = 0; + int i; + + if (!res || index >= res->num_entries) + return -EINVAL; + + id |= ICE_RES_VALID_BIT; + for (i = index; i < res->num_entries && res->list[i] == id; i++) { + res->list[i] = 0; + count++; + } + + return count; +} + +/** + * ice_add_mac_to_list - Add a mac address filter entry to the list + * @vsi: the VSI to be forwarded to + * @add_list: pointer to the list which contains MAC filter entries + * @macaddr: the MAC address to be added. + * + * Adds mac address filter entry to the temp list + * + * Returns 0 on success or ENOMEM on failure. + */ +static int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list, + const u8 *macaddr) +{ + struct ice_fltr_list_entry *tmp; + struct ice_pf *pf = vsi->back; + + tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_ATOMIC); + if (!tmp) + return -ENOMEM; + + tmp->fltr_info.flag = ICE_FLTR_TX; + tmp->fltr_info.src = vsi->vsi_num; + tmp->fltr_info.lkup_type = ICE_SW_LKUP_MAC; + tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI; + tmp->fltr_info.fwd_id.vsi_id = vsi->vsi_num; + ether_addr_copy(tmp->fltr_info.l_data.mac.mac_addr, macaddr); + + INIT_LIST_HEAD(&tmp->list_entry); + list_add(&tmp->list_entry, add_list); + + return 0; +} + +/** + * ice_add_mac_to_sync_list - creates list of mac addresses to be synced + * @netdev: the net device on which the sync is happening + * @addr: mac address to sync + * + * This is a callback function which is called by the in kernel device sync + * functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only + * populates the tmp_sync_list, which is later used by ice_add_mac to add the + * mac filters from the hardware. + */ +static int ice_add_mac_to_sync_list(struct net_device *netdev, const u8 *addr) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + + if (ice_add_mac_to_list(vsi, &vsi->tmp_sync_list, addr)) + return -EINVAL; + + return 0; +} + +/** + * ice_add_mac_to_unsync_list - creates list of mac addresses to be unsynced + * @netdev: the net device on which the unsync is happening + * @addr: mac address to unsync + * + * This is a callback function which is called by the in kernel device unsync + * functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only + * populates the tmp_unsync_list, which is later used by ice_remove_mac to + * delete the mac filters from the hardware. + */ +static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + + if (ice_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr)) + return -EINVAL; + + return 0; +} + +/** + * ice_free_fltr_list - free filter lists helper + * @dev: pointer to the device struct + * @h: pointer to the list head to be freed + * + * Helper function to free filter lists previously created using + * ice_add_mac_to_list + */ +static void ice_free_fltr_list(struct device *dev, struct list_head *h) +{ + struct ice_fltr_list_entry *e, *tmp; + + list_for_each_entry_safe(e, tmp, h, list_entry) { + list_del(&e->list_entry); + devm_kfree(dev, e); + } +} + +/** + * ice_vsi_fltr_changed - check if filter state changed + * @vsi: VSI to be checked + * + * returns true if filter state has changed, false otherwise. + */ +static bool ice_vsi_fltr_changed(struct ice_vsi *vsi) +{ + return test_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags) || + test_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags) || + test_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); +} + +/** + * ice_vsi_sync_fltr - Update the VSI filter list to the HW + * @vsi: ptr to the VSI + * + * Push any outstanding VSI filter changes through the AdminQ. + */ +static int ice_vsi_sync_fltr(struct ice_vsi *vsi) +{ + struct device *dev = &vsi->back->pdev->dev; + struct net_device *netdev = vsi->netdev; + bool promisc_forced_on = false; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + enum ice_status status = 0; + u32 changed_flags = 0; + int err = 0; + + if (!vsi->netdev) + return -EINVAL; + + while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) + usleep_range(1000, 2000); + + changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags; + vsi->current_netdev_flags = vsi->netdev->flags; + + INIT_LIST_HEAD(&vsi->tmp_sync_list); + INIT_LIST_HEAD(&vsi->tmp_unsync_list); + + if (ice_vsi_fltr_changed(vsi)) { + clear_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); + clear_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); + clear_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + + /* grab the netdev's addr_list_lock */ + netif_addr_lock_bh(netdev); + __dev_uc_sync(netdev, ice_add_mac_to_sync_list, + ice_add_mac_to_unsync_list); + __dev_mc_sync(netdev, ice_add_mac_to_sync_list, + ice_add_mac_to_unsync_list); + /* our temp lists are populated. release lock */ + netif_addr_unlock_bh(netdev); + } + + /* Remove mac addresses in the unsync list */ + status = ice_remove_mac(hw, &vsi->tmp_unsync_list); + ice_free_fltr_list(dev, &vsi->tmp_unsync_list); + if (status) { + netdev_err(netdev, "Failed to delete MAC filters\n"); + /* if we failed because of alloc failures, just bail */ + if (status == ICE_ERR_NO_MEMORY) { + err = -ENOMEM; + goto out; + } + } + + /* Add mac addresses in the sync list */ + status = ice_add_mac(hw, &vsi->tmp_sync_list); + ice_free_fltr_list(dev, &vsi->tmp_sync_list); + if (status) { + netdev_err(netdev, "Failed to add MAC filters\n"); + /* If there is no more space for new umac filters, vsi + * should go into promiscuous mode. There should be some + * space reserved for promiscuous filters. + */ + if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC && + !test_and_set_bit(__ICE_FLTR_OVERFLOW_PROMISC, + vsi->state)) { + promisc_forced_on = true; + netdev_warn(netdev, + "Reached MAC filter limit, forcing promisc mode on VSI %d\n", + vsi->vsi_num); + } else { + err = -EIO; + goto out; + } + } + /* check for changes in promiscuous modes */ + if (changed_flags & IFF_ALLMULTI) + netdev_warn(netdev, "Unsupported configuration\n"); + + if (((changed_flags & IFF_PROMISC) || promisc_forced_on) || + test_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags)) { + clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags); + if (vsi->current_netdev_flags & IFF_PROMISC) { + /* Apply TX filter rule to get traffic from VMs */ + status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, true, + ICE_FLTR_TX); + if (status) { + netdev_err(netdev, "Error setting default VSI %i tx rule\n", + vsi->vsi_num); + vsi->current_netdev_flags &= ~IFF_PROMISC; + err = -EIO; + goto out_promisc; + } + /* Apply RX filter rule to get traffic from wire */ + status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, true, + ICE_FLTR_RX); + if (status) { + netdev_err(netdev, "Error setting default VSI %i rx rule\n", + vsi->vsi_num); + vsi->current_netdev_flags &= ~IFF_PROMISC; + err = -EIO; + goto out_promisc; + } + } else { + /* Clear TX filter rule to stop traffic from VMs */ + status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, false, + ICE_FLTR_TX); + if (status) { + netdev_err(netdev, "Error clearing default VSI %i tx rule\n", + vsi->vsi_num); + vsi->current_netdev_flags |= IFF_PROMISC; + err = -EIO; + goto out_promisc; + } + /* Clear filter RX to remove traffic from wire */ + status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, false, + ICE_FLTR_RX); + if (status) { + netdev_err(netdev, "Error clearing default VSI %i rx rule\n", + vsi->vsi_num); + vsi->current_netdev_flags |= IFF_PROMISC; + err = -EIO; + goto out_promisc; + } + } + } + goto exit; + +out_promisc: + set_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags); + goto exit; +out: + /* if something went wrong then set the changed flag so we try again */ + set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); +exit: + clear_bit(__ICE_CFG_BUSY, vsi->state); + return err; +} + +/** + * ice_sync_fltr_subtask - Sync the VSI filter list with HW + * @pf: board private structure + */ +static void ice_sync_fltr_subtask(struct ice_pf *pf) +{ + int v; + + if (!pf || !(test_bit(ICE_FLAG_FLTR_SYNC, pf->flags))) + return; + + clear_bit(ICE_FLAG_FLTR_SYNC, pf->flags); + + for (v = 0; v < pf->num_alloc_vsi; v++) + if (pf->vsi[v] && ice_vsi_fltr_changed(pf->vsi[v]) && + ice_vsi_sync_fltr(pf->vsi[v])) { + /* come back and try again later */ + set_bit(ICE_FLAG_FLTR_SYNC, pf->flags); + break; + } +} + +/** + * ice_is_reset_recovery_pending - schedule a reset + * @state: pf state field + */ +static bool ice_is_reset_recovery_pending(unsigned long int *state) +{ + return test_bit(__ICE_RESET_RECOVERY_PENDING, state); +} + +/** + * ice_prepare_for_reset - prep for the core to reset + * @pf: board private structure + * + * Inform or close all dependent features in prep for reset. + */ +static void +ice_prepare_for_reset(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + u32 v; + + ice_for_each_vsi(pf, v) + if (pf->vsi[v]) + ice_remove_vsi_fltr(hw, pf->vsi[v]->vsi_num); + + dev_dbg(&pf->pdev->dev, "Tearing down internal switch for reset\n"); + + /* disable the VSIs and their queues that are not already DOWN */ + /* pf_dis_all_vsi modifies netdev structures -rtnl_lock needed */ + ice_pf_dis_all_vsi(pf); + + ice_for_each_vsi(pf, v) + if (pf->vsi[v]) + pf->vsi[v]->vsi_num = 0; + + ice_shutdown_all_ctrlq(hw); +} + +/** + * ice_do_reset - Initiate one of many types of resets + * @pf: board private structure + * @reset_type: reset type requested + * before this function was called. + */ +static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) +{ + struct device *dev = &pf->pdev->dev; + struct ice_hw *hw = &pf->hw; + + dev_dbg(dev, "reset_type 0x%x requested\n", reset_type); + WARN_ON(in_interrupt()); + + /* PFR is a bit of a special case because it doesn't result in an OICR + * interrupt. So for PFR, we prepare for reset, issue the reset and + * rebuild sequentially. + */ + if (reset_type == ICE_RESET_PFR) { + set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); + ice_prepare_for_reset(pf); + } + + /* trigger the reset */ + if (ice_reset(hw, reset_type)) { + dev_err(dev, "reset %d failed\n", reset_type); + set_bit(__ICE_RESET_FAILED, pf->state); + clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); + return; + } + + if (reset_type == ICE_RESET_PFR) { + pf->pfr_count++; + ice_rebuild(pf); + clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); + } +} + +/** + * ice_reset_subtask - Set up for resetting the device and driver + * @pf: board private structure + */ +static void ice_reset_subtask(struct ice_pf *pf) +{ + enum ice_reset_req reset_type; + + rtnl_lock(); + + /* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an + * OICR interrupt. The OICR handler (ice_misc_intr) determines what + * type of reset happened and sets __ICE_RESET_RECOVERY_PENDING bit in + * pf->state. So if reset/recovery is pending (as indicated by this bit) + * we do a rebuild and return. + */ + if (ice_is_reset_recovery_pending(pf->state)) { + clear_bit(__ICE_GLOBR_RECV, pf->state); + clear_bit(__ICE_CORER_RECV, pf->state); + ice_prepare_for_reset(pf); + + /* make sure we are ready to rebuild */ + if (ice_check_reset(&pf->hw)) { + set_bit(__ICE_RESET_FAILED, pf->state); + } else { + /* done with reset. start rebuild */ + pf->hw.reset_ongoing = false; + ice_rebuild(pf); + } + clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); + goto unlock; + } + + /* No pending resets to finish processing. Check for new resets */ + if (test_and_clear_bit(__ICE_GLOBR_REQ, pf->state)) + reset_type = ICE_RESET_GLOBR; + else if (test_and_clear_bit(__ICE_CORER_REQ, pf->state)) + reset_type = ICE_RESET_CORER; + else if (test_and_clear_bit(__ICE_PFR_REQ, pf->state)) + reset_type = ICE_RESET_PFR; + else + goto unlock; + + /* reset if not already down or resetting */ + if (!test_bit(__ICE_DOWN, pf->state) && + !test_bit(__ICE_CFG_BUSY, pf->state)) { + ice_do_reset(pf, reset_type); + } + +unlock: + rtnl_unlock(); +} + +/** + * ice_watchdog_subtask - periodic tasks not using event driven scheduling + * @pf: board private structure + */ +static void ice_watchdog_subtask(struct ice_pf *pf) +{ + int i; + + /* if interface is down do nothing */ + if (test_bit(__ICE_DOWN, pf->state) || + test_bit(__ICE_CFG_BUSY, pf->state)) + return; + + /* make sure we don't do these things too often */ + if (time_before(jiffies, + pf->serv_tmr_prev + pf->serv_tmr_period)) + return; + + pf->serv_tmr_prev = jiffies; + + /* Update the stats for active netdevs so the network stack + * can look at updated numbers whenever it cares to + */ + ice_update_pf_stats(pf); + for (i = 0; i < pf->num_alloc_vsi; i++) + if (pf->vsi[i] && pf->vsi[i]->netdev) + ice_update_vsi_stats(pf->vsi[i]); +} + +/** + * ice_print_link_msg - print link up or down message + * @vsi: the VSI whose link status is being queried + * @isup: boolean for if the link is now up or down + */ +void ice_print_link_msg(struct ice_vsi *vsi, bool isup) +{ + const char *speed; + const char *fc; + + if (vsi->current_isup == isup) + return; + + vsi->current_isup = isup; + + if (!isup) { + netdev_info(vsi->netdev, "NIC Link is Down\n"); + return; + } + + switch (vsi->port_info->phy.link_info.link_speed) { + case ICE_AQ_LINK_SPEED_40GB: + speed = "40 G"; + break; + case ICE_AQ_LINK_SPEED_25GB: + speed = "25 G"; + break; + case ICE_AQ_LINK_SPEED_20GB: + speed = "20 G"; + break; + case ICE_AQ_LINK_SPEED_10GB: + speed = "10 G"; + break; + case ICE_AQ_LINK_SPEED_5GB: + speed = "5 G"; + break; + case ICE_AQ_LINK_SPEED_2500MB: + speed = "2.5 G"; + break; + case ICE_AQ_LINK_SPEED_1000MB: + speed = "1 G"; + break; + case ICE_AQ_LINK_SPEED_100MB: + speed = "100 M"; + break; + default: + speed = "Unknown"; + break; + } + + switch (vsi->port_info->fc.current_mode) { + case ICE_FC_FULL: + fc = "RX/TX"; + break; + case ICE_FC_TX_PAUSE: + fc = "TX"; + break; + case ICE_FC_RX_PAUSE: + fc = "RX"; + break; + case ICE_FC_NONE: + fc = "None"; + break; + default: + fc = "Unknown"; + break; + } + + netdev_info(vsi->netdev, "NIC Link is up %sbps, Flow Control: %s\n", + speed, fc); +} + +/** + * ice_init_link_events - enable/initialize link events + * @pi: pointer to the port_info instance + * + * Returns -EIO on failure, 0 on success + */ +static int ice_init_link_events(struct ice_port_info *pi) +{ + u16 mask; + + mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA | + ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL)); + + if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) { + dev_dbg(ice_hw_to_dev(pi->hw), + "Failed to set link event mask for port %d\n", + pi->lport); + return -EIO; + } + + if (ice_aq_get_link_info(pi, true, NULL, NULL)) { + dev_dbg(ice_hw_to_dev(pi->hw), + "Failed to enable link events for port %d\n", + pi->lport); + return -EIO; + } + + return 0; +} + +/** + * ice_vsi_link_event - update the vsi's netdev + * @vsi: the vsi on which the link event occurred + * @link_up: whether or not the vsi needs to be set up or down + */ +static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up) +{ + if (!vsi || test_bit(__ICE_DOWN, vsi->state)) + return; + + if (vsi->type == ICE_VSI_PF) { + if (!vsi->netdev) { + dev_dbg(&vsi->back->pdev->dev, + "vsi->netdev is not initialized!\n"); + return; + } + if (link_up) { + netif_carrier_on(vsi->netdev); + netif_tx_wake_all_queues(vsi->netdev); + } else { + netif_carrier_off(vsi->netdev); + netif_tx_stop_all_queues(vsi->netdev); + } + } +} + +/** + * ice_link_event - process the link event + * @pf: pf that the link event is associated with + * @pi: port_info for the port that the link event is associated with + * + * Returns -EIO if ice_get_link_status() fails + * Returns 0 on success + */ +static int +ice_link_event(struct ice_pf *pf, struct ice_port_info *pi) +{ + u8 new_link_speed, old_link_speed; + struct ice_phy_info *phy_info; + bool new_link_same_as_old; + bool new_link, old_link; + u8 lport; + u16 v; + + phy_info = &pi->phy; + phy_info->link_info_old = phy_info->link_info; + /* Force ice_get_link_status() to update link info */ + phy_info->get_link_info = true; + + old_link = (phy_info->link_info_old.link_info & ICE_AQ_LINK_UP); + old_link_speed = phy_info->link_info_old.link_speed; + + lport = pi->lport; + if (ice_get_link_status(pi, &new_link)) { + dev_dbg(&pf->pdev->dev, + "Could not get link status for port %d\n", lport); + return -EIO; + } + + new_link_speed = phy_info->link_info.link_speed; + + new_link_same_as_old = (new_link == old_link && + new_link_speed == old_link_speed); + + ice_for_each_vsi(pf, v) { + struct ice_vsi *vsi = pf->vsi[v]; + + if (!vsi || !vsi->port_info) + continue; + + if (new_link_same_as_old && + (test_bit(__ICE_DOWN, vsi->state) || + new_link == netif_carrier_ok(vsi->netdev))) + continue; + + if (vsi->port_info->lport == lport) { + ice_print_link_msg(vsi, new_link); + ice_vsi_link_event(vsi, new_link); + } + } + + return 0; +} + +/** + * ice_handle_link_event - handle link event via ARQ + * @pf: pf that the link event is associated with + * + * Return -EINVAL if port_info is null + * Return status on succes + */ +static int ice_handle_link_event(struct ice_pf *pf) +{ + struct ice_port_info *port_info; + int status; + + port_info = pf->hw.port_info; + if (!port_info) + return -EINVAL; + + status = ice_link_event(pf, port_info); + if (status) + dev_dbg(&pf->pdev->dev, + "Could not process link event, error %d\n", status); + + return status; +} + +/** + * __ice_clean_ctrlq - helper function to clean controlq rings + * @pf: ptr to struct ice_pf + * @q_type: specific Control queue type + */ +static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) +{ + struct ice_rq_event_info event; + struct ice_hw *hw = &pf->hw; + struct ice_ctl_q_info *cq; + u16 pending, i = 0; + const char *qtype; + u32 oldval, val; + + /* Do not clean control queue if/when PF reset fails */ + if (test_bit(__ICE_RESET_FAILED, pf->state)) + return 0; + + switch (q_type) { + case ICE_CTL_Q_ADMIN: + cq = &hw->adminq; + qtype = "Admin"; + break; + default: + dev_warn(&pf->pdev->dev, "Unknown control queue type 0x%x\n", + q_type); + return 0; + } + + /* check for error indications - PF_xx_AxQLEN register layout for + * FW/MBX/SB are identical so just use defines for PF_FW_AxQLEN. + */ + val = rd32(hw, cq->rq.len); + if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M | + PF_FW_ARQLEN_ARQCRIT_M)) { + oldval = val; + if (val & PF_FW_ARQLEN_ARQVFE_M) + dev_dbg(&pf->pdev->dev, + "%s Receive Queue VF Error detected\n", qtype); + if (val & PF_FW_ARQLEN_ARQOVFL_M) { + dev_dbg(&pf->pdev->dev, + "%s Receive Queue Overflow Error detected\n", + qtype); + } + if (val & PF_FW_ARQLEN_ARQCRIT_M) + dev_dbg(&pf->pdev->dev, + "%s Receive Queue Critical Error detected\n", + qtype); + val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M | + PF_FW_ARQLEN_ARQCRIT_M); + if (oldval != val) + wr32(hw, cq->rq.len, val); + } + + val = rd32(hw, cq->sq.len); + if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M | + PF_FW_ATQLEN_ATQCRIT_M)) { + oldval = val; + if (val & PF_FW_ATQLEN_ATQVFE_M) + dev_dbg(&pf->pdev->dev, + "%s Send Queue VF Error detected\n", qtype); + if (val & PF_FW_ATQLEN_ATQOVFL_M) { + dev_dbg(&pf->pdev->dev, + "%s Send Queue Overflow Error detected\n", + qtype); + } + if (val & PF_FW_ATQLEN_ATQCRIT_M) + dev_dbg(&pf->pdev->dev, + "%s Send Queue Critical Error detected\n", + qtype); + val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M | + PF_FW_ATQLEN_ATQCRIT_M); + if (oldval != val) + wr32(hw, cq->sq.len, val); + } + + event.buf_len = cq->rq_buf_size; + event.msg_buf = devm_kzalloc(&pf->pdev->dev, event.buf_len, + GFP_KERNEL); + if (!event.msg_buf) + return 0; + + do { + enum ice_status ret; + u16 opcode; + + ret = ice_clean_rq_elem(hw, cq, &event, &pending); + if (ret == ICE_ERR_AQ_NO_WORK) + break; + if (ret) { + dev_err(&pf->pdev->dev, + "%s Receive Queue event error %d\n", qtype, + ret); + break; + } + + opcode = le16_to_cpu(event.desc.opcode); + + switch (opcode) { + case ice_aqc_opc_get_link_status: + if (ice_handle_link_event(pf)) + dev_err(&pf->pdev->dev, + "Could not handle link event\n"); + break; + default: + dev_dbg(&pf->pdev->dev, + "%s Receive Queue unknown event 0x%04x ignored\n", + qtype, opcode); + break; + } + } while (pending && (i++ < ICE_DFLT_IRQ_WORK)); + + devm_kfree(&pf->pdev->dev, event.msg_buf); + + return pending && (i == ICE_DFLT_IRQ_WORK); +} + +/** + * ice_ctrlq_pending - check if there is a difference between ntc and ntu + * @hw: pointer to hardware info + * @cq: control queue information + * + * returns true if there are pending messages in a queue, false if there aren't + */ +static bool ice_ctrlq_pending(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + u16 ntu; + + ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask); + return cq->rq.next_to_clean != ntu; +} + +/** + * ice_clean_adminq_subtask - clean the AdminQ rings + * @pf: board private structure + */ +static void ice_clean_adminq_subtask(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + + if (!test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state)) + return; + + if (__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN)) + return; + + clear_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state); + + /* There might be a situation where new messages arrive to a control + * queue between processing the last message and clearing the + * EVENT_PENDING bit. So before exiting, check queue head again (using + * ice_ctrlq_pending) and process new messages if any. + */ + if (ice_ctrlq_pending(hw, &hw->adminq)) + __ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN); + + ice_flush(hw); +} + +/** + * ice_service_task_schedule - schedule the service task to wake up + * @pf: board private structure + * + * If not already scheduled, this puts the task into the work queue. + */ +static void ice_service_task_schedule(struct ice_pf *pf) +{ + if (!test_bit(__ICE_DOWN, pf->state) && + !test_and_set_bit(__ICE_SERVICE_SCHED, pf->state)) + queue_work(ice_wq, &pf->serv_task); +} + +/** + * ice_service_task_complete - finish up the service task + * @pf: board private structure + */ +static void ice_service_task_complete(struct ice_pf *pf) +{ + WARN_ON(!test_bit(__ICE_SERVICE_SCHED, pf->state)); + + /* force memory (pf->state) to sync before next service task */ + smp_mb__before_atomic(); + clear_bit(__ICE_SERVICE_SCHED, pf->state); +} + +/** + * ice_service_timer - timer callback to schedule service task + * @t: pointer to timer_list + */ +static void ice_service_timer(struct timer_list *t) +{ + struct ice_pf *pf = from_timer(pf, t, serv_tmr); + + mod_timer(&pf->serv_tmr, round_jiffies(pf->serv_tmr_period + jiffies)); + ice_service_task_schedule(pf); +} + +/** + * ice_service_task - manage and run subtasks + * @work: pointer to work_struct contained by the PF struct + */ +static void ice_service_task(struct work_struct *work) +{ + struct ice_pf *pf = container_of(work, struct ice_pf, serv_task); + unsigned long start_time = jiffies; + + /* subtasks */ + + /* process reset requests first */ + ice_reset_subtask(pf); + + /* bail if a reset/recovery cycle is pending */ + if (ice_is_reset_recovery_pending(pf->state) || + test_bit(__ICE_SUSPENDED, pf->state)) { + ice_service_task_complete(pf); + return; + } + + ice_sync_fltr_subtask(pf); + ice_watchdog_subtask(pf); + ice_clean_adminq_subtask(pf); + + /* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */ + ice_service_task_complete(pf); + + /* If the tasks have taken longer than one service timer period + * or there is more work to be done, reset the service timer to + * schedule the service task now. + */ + if (time_after(jiffies, (start_time + pf->serv_tmr_period)) || + test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state)) + mod_timer(&pf->serv_tmr, jiffies); +} + +/** + * ice_set_ctrlq_len - helper function to set controlq length + * @hw: pointer to the hw instance + */ +static void ice_set_ctrlq_len(struct ice_hw *hw) +{ + hw->adminq.num_rq_entries = ICE_AQ_LEN; + hw->adminq.num_sq_entries = ICE_AQ_LEN; + hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN; + hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN; +} + +/** + * ice_irq_affinity_notify - Callback for affinity changes + * @notify: context as to what irq was changed + * @mask: the new affinity mask + * + * This is a callback function used by the irq_set_affinity_notifier function + * so that we may register to receive changes to the irq affinity masks. + */ +static void ice_irq_affinity_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct ice_q_vector *q_vector = + container_of(notify, struct ice_q_vector, affinity_notify); + + cpumask_copy(&q_vector->affinity_mask, mask); +} + +/** + * ice_irq_affinity_release - Callback for affinity notifier release + * @ref: internal core kernel usage + * + * This is a callback function used by the irq_set_affinity_notifier function + * to inform the current notification subscriber that they will no longer + * receive notifications. + */ +static void ice_irq_affinity_release(struct kref __always_unused *ref) {} + +/** + * ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI + * @vsi: the VSI being un-configured + */ +static void ice_vsi_dis_irq(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + int base = vsi->base_vector; + u32 val; + int i; + + /* disable interrupt causation from each queue */ + if (vsi->tx_rings) { + ice_for_each_txq(vsi, i) { + if (vsi->tx_rings[i]) { + u16 reg; + + reg = vsi->tx_rings[i]->reg_idx; + val = rd32(hw, QINT_TQCTL(reg)); + val &= ~QINT_TQCTL_CAUSE_ENA_M; + wr32(hw, QINT_TQCTL(reg), val); + } + } + } + + if (vsi->rx_rings) { + ice_for_each_rxq(vsi, i) { + if (vsi->rx_rings[i]) { + u16 reg; + + reg = vsi->rx_rings[i]->reg_idx; + val = rd32(hw, QINT_RQCTL(reg)); + val &= ~QINT_RQCTL_CAUSE_ENA_M; + wr32(hw, QINT_RQCTL(reg), val); + } + } + } + + /* disable each interrupt */ + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { + for (i = vsi->base_vector; + i < (vsi->num_q_vectors + vsi->base_vector); i++) + wr32(hw, GLINT_DYN_CTL(i), 0); + + ice_flush(hw); + for (i = 0; i < vsi->num_q_vectors; i++) + synchronize_irq(pf->msix_entries[i + base].vector); + } +} + +/** + * ice_vsi_ena_irq - Enable IRQ for the given VSI + * @vsi: the VSI being configured + */ +static int ice_vsi_ena_irq(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { + int i; + + for (i = 0; i < vsi->num_q_vectors; i++) + ice_irq_dynamic_ena(hw, vsi, vsi->q_vectors[i]); + } + + ice_flush(hw); + return 0; +} + +/** + * ice_vsi_delete - delete a VSI from the switch + * @vsi: pointer to VSI being removed + */ +static void ice_vsi_delete(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + struct ice_vsi_ctx ctxt; + enum ice_status status; + + ctxt.vsi_num = vsi->vsi_num; + + memcpy(&ctxt.info, &vsi->info, sizeof(struct ice_aqc_vsi_props)); + + status = ice_aq_free_vsi(&pf->hw, &ctxt, false, NULL); + if (status) + dev_err(&pf->pdev->dev, "Failed to delete VSI %i in FW\n", + vsi->vsi_num); +} + +/** + * ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI + * @vsi: the VSI being configured + * @basename: name for the vector + */ +static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) +{ + int q_vectors = vsi->num_q_vectors; + struct ice_pf *pf = vsi->back; + int base = vsi->base_vector; + int rx_int_idx = 0; + int tx_int_idx = 0; + int vector, err; + int irq_num; + + for (vector = 0; vector < q_vectors; vector++) { + struct ice_q_vector *q_vector = vsi->q_vectors[vector]; + + irq_num = pf->msix_entries[base + vector].vector; + + if (q_vector->tx.ring && q_vector->rx.ring) { + snprintf(q_vector->name, sizeof(q_vector->name) - 1, + "%s-%s-%d", basename, "TxRx", rx_int_idx++); + tx_int_idx++; + } else if (q_vector->rx.ring) { + snprintf(q_vector->name, sizeof(q_vector->name) - 1, + "%s-%s-%d", basename, "rx", rx_int_idx++); + } else if (q_vector->tx.ring) { + snprintf(q_vector->name, sizeof(q_vector->name) - 1, + "%s-%s-%d", basename, "tx", tx_int_idx++); + } else { + /* skip this unused q_vector */ + continue; + } + err = devm_request_irq(&pf->pdev->dev, + pf->msix_entries[base + vector].vector, + vsi->irq_handler, 0, q_vector->name, + q_vector); + if (err) { + netdev_err(vsi->netdev, + "MSIX request_irq failed, error: %d\n", err); + goto free_q_irqs; + } + + /* register for affinity change notifications */ + q_vector->affinity_notify.notify = ice_irq_affinity_notify; + q_vector->affinity_notify.release = ice_irq_affinity_release; + irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); + + /* assign the mask for this irq */ + irq_set_affinity_hint(irq_num, &q_vector->affinity_mask); + } + + vsi->irqs_ready = true; + return 0; + +free_q_irqs: + while (vector) { + vector--; + irq_num = pf->msix_entries[base + vector].vector, + irq_set_affinity_notifier(irq_num, NULL); + irq_set_affinity_hint(irq_num, NULL); + devm_free_irq(&pf->pdev->dev, irq_num, &vsi->q_vectors[vector]); + } + return err; +} + +/** + * ice_vsi_set_rss_params - Setup RSS capabilities per VSI type + * @vsi: the VSI being configured + */ +static void ice_vsi_set_rss_params(struct ice_vsi *vsi) +{ + struct ice_hw_common_caps *cap; + struct ice_pf *pf = vsi->back; + + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + vsi->rss_size = 1; + return; + } + + cap = &pf->hw.func_caps.common_cap; + switch (vsi->type) { + case ICE_VSI_PF: + /* PF VSI will inherit RSS instance of PF */ + vsi->rss_table_size = cap->rss_table_size; + vsi->rss_size = min_t(int, num_online_cpus(), + BIT(cap->rss_table_entry_width)); + vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF; + break; + default: + dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); + break; + } +} + +/** + * ice_vsi_setup_q_map - Setup a VSI queue map + * @vsi: the VSI being configured + * @ctxt: VSI context structure + */ +static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) +{ + u16 offset = 0, qmap = 0, numq_tc; + u16 pow = 0, max_rss = 0, qcount; + u16 qcount_tx = vsi->alloc_txq; + u16 qcount_rx = vsi->alloc_rxq; + bool ena_tc0 = false; + int i; + + /* at least TC0 should be enabled by default */ + if (vsi->tc_cfg.numtc) { + if (!(vsi->tc_cfg.ena_tc & BIT(0))) + ena_tc0 = true; + } else { + ena_tc0 = true; + } + + if (ena_tc0) { + vsi->tc_cfg.numtc++; + vsi->tc_cfg.ena_tc |= 1; + } + + numq_tc = qcount_rx / vsi->tc_cfg.numtc; + + /* TC mapping is a function of the number of Rx queues assigned to the + * VSI for each traffic class and the offset of these queues. + * The first 10 bits are for queue offset for TC0, next 4 bits for no:of + * queues allocated to TC0. No:of queues is a power-of-2. + * + * If TC is not enabled, the queue offset is set to 0, and allocate one + * queue, this way, traffic for the given TC will be sent to the default + * queue. + * + * Setup number and offset of Rx queues for all TCs for the VSI + */ + + /* qcount will change if RSS is enabled */ + if (test_bit(ICE_FLAG_RSS_ENA, vsi->back->flags)) { + if (vsi->type == ICE_VSI_PF) + max_rss = ICE_MAX_LG_RSS_QS; + else + max_rss = ICE_MAX_SMALL_RSS_QS; + + qcount = min_t(int, numq_tc, max_rss); + qcount = min_t(int, qcount, vsi->rss_size); + } else { + qcount = numq_tc; + } + + /* find the (rounded up) power-of-2 of qcount */ + pow = order_base_2(qcount); + + for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { + if (!(vsi->tc_cfg.ena_tc & BIT(i))) { + /* TC is not enabled */ + vsi->tc_cfg.tc_info[i].qoffset = 0; + vsi->tc_cfg.tc_info[i].qcount = 1; + ctxt->info.tc_mapping[i] = 0; + continue; + } + + /* TC is enabled */ + vsi->tc_cfg.tc_info[i].qoffset = offset; + vsi->tc_cfg.tc_info[i].qcount = qcount; + + qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & + ICE_AQ_VSI_TC_Q_OFFSET_M) | + ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & + ICE_AQ_VSI_TC_Q_NUM_M); + offset += qcount; + ctxt->info.tc_mapping[i] = cpu_to_le16(qmap); + } + + vsi->num_txq = qcount_tx; + vsi->num_rxq = offset; + + /* Rx queue mapping */ + ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG); + /* q_mapping buffer holds the info for the first queue allocated for + * this VSI in the PF space and also the number of queues associated + * with this VSI. + */ + ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]); + ctxt->info.q_mapping[1] = cpu_to_le16(vsi->num_rxq); +} + +/** + * ice_set_dflt_vsi_ctx - Set default VSI context before adding a VSI + * @ctxt: the VSI context being set + * + * This initializes a default VSI context for all sections except the Queues. + */ +static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt) +{ + u32 table = 0; + + memset(&ctxt->info, 0, sizeof(ctxt->info)); + /* VSI's should be allocated from shared pool */ + ctxt->alloc_from_pool = true; + /* Src pruning enabled by default */ + ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE; + /* Traffic from VSI can be sent to LAN */ + ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA; + + /* By default bits 3 and 4 in vlan_flags are 0's which results in legacy + * behavior (show VLAN, DEI, and UP) in descriptor. Also, allow all + * packets untagged/tagged. + */ + ctxt->info.vlan_flags = ((ICE_AQ_VSI_VLAN_MODE_ALL & + ICE_AQ_VSI_VLAN_MODE_M) >> + ICE_AQ_VSI_VLAN_MODE_S); + + /* Have 1:1 UP mapping for both ingress/egress tables */ + table |= ICE_UP_TABLE_TRANSLATE(0, 0); + table |= ICE_UP_TABLE_TRANSLATE(1, 1); + table |= ICE_UP_TABLE_TRANSLATE(2, 2); + table |= ICE_UP_TABLE_TRANSLATE(3, 3); + table |= ICE_UP_TABLE_TRANSLATE(4, 4); + table |= ICE_UP_TABLE_TRANSLATE(5, 5); + table |= ICE_UP_TABLE_TRANSLATE(6, 6); + table |= ICE_UP_TABLE_TRANSLATE(7, 7); + ctxt->info.ingress_table = cpu_to_le32(table); + ctxt->info.egress_table = cpu_to_le32(table); + /* Have 1:1 UP mapping for outer to inner UP table */ + ctxt->info.outer_up_table = cpu_to_le32(table); + /* No Outer tag support outer_tag_flags remains to zero */ +} + +/** + * ice_set_rss_vsi_ctx - Set RSS VSI context before adding a VSI + * @ctxt: the VSI context being set + * @vsi: the VSI being configured + */ +static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) +{ + u8 lut_type, hash_type; + + switch (vsi->type) { + case ICE_VSI_PF: + /* PF VSI will inherit RSS instance of PF */ + lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF; + hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ; + break; + default: + dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n", + vsi->type); + return; + } + + ctxt->info.q_opt_rss = ((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) & + ICE_AQ_VSI_Q_OPT_RSS_LUT_M) | + ((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) & + ICE_AQ_VSI_Q_OPT_RSS_HASH_M); +} + +/** + * ice_vsi_add - Create a new VSI or fetch preallocated VSI + * @vsi: the VSI being configured + * + * This initializes a VSI context depending on the VSI type to be added and + * passes it down to the add_vsi aq command to create a new VSI. + */ +static int ice_vsi_add(struct ice_vsi *vsi) +{ + struct ice_vsi_ctx ctxt = { 0 }; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + int ret = 0; + + switch (vsi->type) { + case ICE_VSI_PF: + ctxt.flags = ICE_AQ_VSI_TYPE_PF; + break; + default: + return -ENODEV; + } + + ice_set_dflt_vsi_ctx(&ctxt); + /* if the switch is in VEB mode, allow VSI loopback */ + if (vsi->vsw->bridge_mode == BRIDGE_MODE_VEB) + ctxt.info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB; + + /* Set LUT type and HASH type if RSS is enabled */ + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) + ice_set_rss_vsi_ctx(&ctxt, vsi); + + ctxt.info.sw_id = vsi->port_info->sw_id; + ice_vsi_setup_q_map(vsi, &ctxt); + + ret = ice_aq_add_vsi(hw, &ctxt, NULL); + if (ret) { + dev_err(&vsi->back->pdev->dev, + "Add VSI AQ call failed, err %d\n", ret); + return -EIO; + } + vsi->info = ctxt.info; + vsi->vsi_num = ctxt.vsi_num; + + return ret; +} + +/** + * ice_vsi_release_msix - Clear the queue to Interrupt mapping in HW + * @vsi: the VSI being cleaned up + */ +static void ice_vsi_release_msix(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + u16 vector = vsi->base_vector; + struct ice_hw *hw = &pf->hw; + u32 txq = 0; + u32 rxq = 0; + int i, q; + + for (i = 0; i < vsi->num_q_vectors; i++, vector++) { + struct ice_q_vector *q_vector = vsi->q_vectors[i]; + + wr32(hw, GLINT_ITR(ICE_RX_ITR, vector), 0); + wr32(hw, GLINT_ITR(ICE_TX_ITR, vector), 0); + for (q = 0; q < q_vector->num_ring_tx; q++) { + wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0); + txq++; + } + + for (q = 0; q < q_vector->num_ring_rx; q++) { + wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), 0); + rxq++; + } + } + + ice_flush(hw); +} + +/** + * ice_vsi_clear_rings - Deallocates the Tx and Rx rings for VSI + * @vsi: the VSI having rings deallocated + */ +static void ice_vsi_clear_rings(struct ice_vsi *vsi) +{ + int i; + + if (vsi->tx_rings) { + for (i = 0; i < vsi->alloc_txq; i++) { + if (vsi->tx_rings[i]) { + kfree_rcu(vsi->tx_rings[i], rcu); + vsi->tx_rings[i] = NULL; + } + } + } + if (vsi->rx_rings) { + for (i = 0; i < vsi->alloc_rxq; i++) { + if (vsi->rx_rings[i]) { + kfree_rcu(vsi->rx_rings[i], rcu); + vsi->rx_rings[i] = NULL; + } + } + } +} + +/** + * ice_vsi_alloc_rings - Allocates Tx and Rx rings for the VSI + * @vsi: VSI which is having rings allocated + */ +static int ice_vsi_alloc_rings(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + int i; + + /* Allocate tx_rings */ + for (i = 0; i < vsi->alloc_txq; i++) { + struct ice_ring *ring; + + /* allocate with kzalloc(), free with kfree_rcu() */ + ring = kzalloc(sizeof(*ring), GFP_KERNEL); + + if (!ring) + goto err_out; + + ring->q_index = i; + ring->reg_idx = vsi->txq_map[i]; + ring->ring_active = false; + ring->vsi = vsi; + ring->netdev = vsi->netdev; + ring->dev = &pf->pdev->dev; + ring->count = vsi->num_desc; + + vsi->tx_rings[i] = ring; + } + + /* Allocate rx_rings */ + for (i = 0; i < vsi->alloc_rxq; i++) { + struct ice_ring *ring; + + /* allocate with kzalloc(), free with kfree_rcu() */ + ring = kzalloc(sizeof(*ring), GFP_KERNEL); + if (!ring) + goto err_out; + + ring->q_index = i; + ring->reg_idx = vsi->rxq_map[i]; + ring->ring_active = false; + ring->vsi = vsi; + ring->netdev = vsi->netdev; + ring->dev = &pf->pdev->dev; + ring->count = vsi->num_desc; + vsi->rx_rings[i] = ring; + } + + return 0; + +err_out: + ice_vsi_clear_rings(vsi); + return -ENOMEM; +} + +/** + * ice_vsi_free_irq - Free the irq association with the OS + * @vsi: the VSI being configured + */ +static void ice_vsi_free_irq(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + int base = vsi->base_vector; + + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { + int i; + + if (!vsi->q_vectors || !vsi->irqs_ready) + return; + + vsi->irqs_ready = false; + for (i = 0; i < vsi->num_q_vectors; i++) { + u16 vector = i + base; + int irq_num; + + irq_num = pf->msix_entries[vector].vector; + + /* free only the irqs that were actually requested */ + if (!vsi->q_vectors[i] || + !(vsi->q_vectors[i]->num_ring_tx || + vsi->q_vectors[i]->num_ring_rx)) + continue; + + /* clear the affinity notifier in the IRQ descriptor */ + irq_set_affinity_notifier(irq_num, NULL); + + /* clear the affinity_mask in the IRQ descriptor */ + irq_set_affinity_hint(irq_num, NULL); + synchronize_irq(irq_num); + devm_free_irq(&pf->pdev->dev, irq_num, + vsi->q_vectors[i]); + } + ice_vsi_release_msix(vsi); + } +} + +/** + * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW + * @vsi: the VSI being configured + */ +static void ice_vsi_cfg_msix(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + u16 vector = vsi->base_vector; + struct ice_hw *hw = &pf->hw; + u32 txq = 0, rxq = 0; + int i, q, itr; + u8 itr_gran; + + for (i = 0; i < vsi->num_q_vectors; i++, vector++) { + struct ice_q_vector *q_vector = vsi->q_vectors[i]; + + itr_gran = hw->itr_gran_200; + + if (q_vector->num_ring_rx) { + q_vector->rx.itr = + ITR_TO_REG(vsi->rx_rings[rxq]->rx_itr_setting, + itr_gran); + q_vector->rx.latency_range = ICE_LOW_LATENCY; + } + + if (q_vector->num_ring_tx) { + q_vector->tx.itr = + ITR_TO_REG(vsi->tx_rings[txq]->tx_itr_setting, + itr_gran); + q_vector->tx.latency_range = ICE_LOW_LATENCY; + } + wr32(hw, GLINT_ITR(ICE_RX_ITR, vector), q_vector->rx.itr); + wr32(hw, GLINT_ITR(ICE_TX_ITR, vector), q_vector->tx.itr); + + /* Both Transmit Queue Interrupt Cause Control register + * and Receive Queue Interrupt Cause control register + * expects MSIX_INDX field to be the vector index + * within the function space and not the absolute + * vector index across PF or across device. + * For SR-IOV VF VSIs queue vector index always starts + * with 1 since first vector index(0) is used for OICR + * in VF space. Since VMDq and other PF VSIs are withtin + * the PF function space, use the vector index thats + * tracked for this PF. + */ + for (q = 0; q < q_vector->num_ring_tx; q++) { + u32 val; + + itr = ICE_TX_ITR; + val = QINT_TQCTL_CAUSE_ENA_M | + (itr << QINT_TQCTL_ITR_INDX_S) | + (vector << QINT_TQCTL_MSIX_INDX_S); + wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val); + txq++; + } + + for (q = 0; q < q_vector->num_ring_rx; q++) { + u32 val; + + itr = ICE_RX_ITR; + val = QINT_RQCTL_CAUSE_ENA_M | + (itr << QINT_RQCTL_ITR_INDX_S) | + (vector << QINT_RQCTL_MSIX_INDX_S); + wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val); + rxq++; + } + } + + ice_flush(hw); +} + +/** + * ice_ena_misc_vector - enable the non-queue interrupts + * @pf: board private structure + */ +static void ice_ena_misc_vector(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + u32 val; + + /* clear things first */ + wr32(hw, PFINT_OICR_ENA, 0); /* disable all */ + rd32(hw, PFINT_OICR); /* read to clear */ + + val = (PFINT_OICR_ECC_ERR_M | + PFINT_OICR_MAL_DETECT_M | + PFINT_OICR_GRST_M | + PFINT_OICR_PCI_EXCEPTION_M | + PFINT_OICR_HMC_ERR_M | + PFINT_OICR_PE_CRITERR_M); + + wr32(hw, PFINT_OICR_ENA, val); + + /* SW_ITR_IDX = 0, but don't change INTENA */ + wr32(hw, GLINT_DYN_CTL(pf->oicr_idx), + GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M); +} + +/** + * ice_misc_intr - misc interrupt handler + * @irq: interrupt number + * @data: pointer to a q_vector + */ +static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) +{ + struct ice_pf *pf = (struct ice_pf *)data; + struct ice_hw *hw = &pf->hw; + irqreturn_t ret = IRQ_NONE; + u32 oicr, ena_mask; + + set_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state); + + oicr = rd32(hw, PFINT_OICR); + ena_mask = rd32(hw, PFINT_OICR_ENA); + + if (oicr & PFINT_OICR_GRST_M) { + u32 reset; + /* we have a reset warning */ + ena_mask &= ~PFINT_OICR_GRST_M; + reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >> + GLGEN_RSTAT_RESET_TYPE_S; + + if (reset == ICE_RESET_CORER) + pf->corer_count++; + else if (reset == ICE_RESET_GLOBR) + pf->globr_count++; + else + pf->empr_count++; + + /* If a reset cycle isn't already in progress, we set a bit in + * pf->state so that the service task can start a reset/rebuild. + * We also make note of which reset happened so that peer + * devices/drivers can be informed. + */ + if (!test_and_set_bit(__ICE_RESET_RECOVERY_PENDING, + pf->state)) { + if (reset == ICE_RESET_CORER) + set_bit(__ICE_CORER_RECV, pf->state); + else if (reset == ICE_RESET_GLOBR) + set_bit(__ICE_GLOBR_RECV, pf->state); + else + set_bit(__ICE_EMPR_RECV, pf->state); + + /* There are couple of different bits at play here. + * hw->reset_ongoing indicates whether the hardware is + * in reset. This is set to true when a reset interrupt + * is received and set back to false after the driver + * has determined that the hardware is out of reset. + * + * __ICE_RESET_RECOVERY_PENDING in pf->state indicates + * that a post reset rebuild is required before the + * driver is operational again. This is set above. + * + * As this is the start of the reset/rebuild cycle, set + * both to indicate that. + */ + hw->reset_ongoing = true; + } + } + + if (oicr & PFINT_OICR_HMC_ERR_M) { + ena_mask &= ~PFINT_OICR_HMC_ERR_M; + dev_dbg(&pf->pdev->dev, + "HMC Error interrupt - info 0x%x, data 0x%x\n", + rd32(hw, PFHMC_ERRORINFO), + rd32(hw, PFHMC_ERRORDATA)); + } + + /* Report and mask off any remaining unexpected interrupts */ + oicr &= ena_mask; + if (oicr) { + dev_dbg(&pf->pdev->dev, "unhandled interrupt oicr=0x%08x\n", + oicr); + /* If a critical error is pending there is no choice but to + * reset the device. + */ + if (oicr & (PFINT_OICR_PE_CRITERR_M | + PFINT_OICR_PCI_EXCEPTION_M | + PFINT_OICR_ECC_ERR_M)) { + set_bit(__ICE_PFR_REQ, pf->state); + ice_service_task_schedule(pf); + } + ena_mask &= ~oicr; + } + ret = IRQ_HANDLED; + + /* re-enable interrupt causes that are not handled during this pass */ + wr32(hw, PFINT_OICR_ENA, ena_mask); + if (!test_bit(__ICE_DOWN, pf->state)) { + ice_service_task_schedule(pf); + ice_irq_dynamic_ena(hw, NULL, NULL); + } + + return ret; +} + +/** + * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors + * @vsi: the VSI being configured + * + * This function maps descriptor rings to the queue-specific vectors allotted + * through the MSI-X enabling code. On a constrained vector budget, we map Tx + * and Rx rings to the vector as "efficiently" as possible. + */ +static void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi) +{ + int q_vectors = vsi->num_q_vectors; + int tx_rings_rem, rx_rings_rem; + int v_id; + + /* initially assigning remaining rings count to VSIs num queue value */ + tx_rings_rem = vsi->num_txq; + rx_rings_rem = vsi->num_rxq; + + for (v_id = 0; v_id < q_vectors; v_id++) { + struct ice_q_vector *q_vector = vsi->q_vectors[v_id]; + int tx_rings_per_v, rx_rings_per_v, q_id, q_base; + + /* Tx rings mapping to vector */ + tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id); + q_vector->num_ring_tx = tx_rings_per_v; + q_vector->tx.ring = NULL; + q_base = vsi->num_txq - tx_rings_rem; + + for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) { + struct ice_ring *tx_ring = vsi->tx_rings[q_id]; + + tx_ring->q_vector = q_vector; + tx_ring->next = q_vector->tx.ring; + q_vector->tx.ring = tx_ring; + } + tx_rings_rem -= tx_rings_per_v; + + /* Rx rings mapping to vector */ + rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id); + q_vector->num_ring_rx = rx_rings_per_v; + q_vector->rx.ring = NULL; + q_base = vsi->num_rxq - rx_rings_rem; + + for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) { + struct ice_ring *rx_ring = vsi->rx_rings[q_id]; + + rx_ring->q_vector = q_vector; + rx_ring->next = q_vector->rx.ring; + q_vector->rx.ring = rx_ring; + } + rx_rings_rem -= rx_rings_per_v; + } +} + +/** + * ice_vsi_set_num_qs - Set num queues, descriptors and vectors for a VSI + * @vsi: the VSI being configured + * + * Return 0 on success and a negative value on error + */ +static void ice_vsi_set_num_qs(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + + switch (vsi->type) { + case ICE_VSI_PF: + vsi->alloc_txq = pf->num_lan_tx; + vsi->alloc_rxq = pf->num_lan_rx; + vsi->num_desc = ALIGN(ICE_DFLT_NUM_DESC, ICE_REQ_DESC_MULTIPLE); + vsi->num_q_vectors = max_t(int, pf->num_lan_rx, pf->num_lan_tx); + break; + default: + dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n", + vsi->type); + break; + } +} + +/** + * ice_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi + * @vsi: VSI pointer + * @alloc_qvectors: a bool to specify if q_vectors need to be allocated. + * + * On error: returns error code (negative) + * On success: returns 0 + */ +static int ice_vsi_alloc_arrays(struct ice_vsi *vsi, bool alloc_qvectors) +{ + struct ice_pf *pf = vsi->back; + + /* allocate memory for both Tx and Rx ring pointers */ + vsi->tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq, + sizeof(struct ice_ring *), GFP_KERNEL); + if (!vsi->tx_rings) + goto err_txrings; + + vsi->rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq, + sizeof(struct ice_ring *), GFP_KERNEL); + if (!vsi->rx_rings) + goto err_rxrings; + + if (alloc_qvectors) { + /* allocate memory for q_vector pointers */ + vsi->q_vectors = devm_kcalloc(&pf->pdev->dev, + vsi->num_q_vectors, + sizeof(struct ice_q_vector *), + GFP_KERNEL); + if (!vsi->q_vectors) + goto err_vectors; + } + + return 0; + +err_vectors: + devm_kfree(&pf->pdev->dev, vsi->rx_rings); +err_rxrings: + devm_kfree(&pf->pdev->dev, vsi->tx_rings); +err_txrings: + return -ENOMEM; +} + +/** + * ice_msix_clean_rings - MSIX mode Interrupt Handler + * @irq: interrupt number + * @data: pointer to a q_vector + */ +static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data) +{ + struct ice_q_vector *q_vector = (struct ice_q_vector *)data; + + if (!q_vector->tx.ring && !q_vector->rx.ring) + return IRQ_HANDLED; + + napi_schedule(&q_vector->napi); + + return IRQ_HANDLED; +} + +/** + * ice_vsi_alloc - Allocates the next available struct vsi in the PF + * @pf: board private structure + * @type: type of VSI + * + * returns a pointer to a VSI on success, NULL on failure. + */ +static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type) +{ + struct ice_vsi *vsi = NULL; + + /* Need to protect the allocation of the VSIs at the PF level */ + mutex_lock(&pf->sw_mutex); + + /* If we have already allocated our maximum number of VSIs, + * pf->next_vsi will be ICE_NO_VSI. If not, pf->next_vsi index + * is available to be populated + */ + if (pf->next_vsi == ICE_NO_VSI) { + dev_dbg(&pf->pdev->dev, "out of VSI slots!\n"); + goto unlock_pf; + } + + vsi = devm_kzalloc(&pf->pdev->dev, sizeof(*vsi), GFP_KERNEL); + if (!vsi) + goto unlock_pf; + + vsi->type = type; + vsi->back = pf; + set_bit(__ICE_DOWN, vsi->state); + vsi->idx = pf->next_vsi; + vsi->work_lmt = ICE_DFLT_IRQ_WORK; + + ice_vsi_set_num_qs(vsi); + + switch (vsi->type) { + case ICE_VSI_PF: + if (ice_vsi_alloc_arrays(vsi, true)) + goto err_rings; + + /* Setup default MSIX irq handler for VSI */ + vsi->irq_handler = ice_msix_clean_rings; + break; + default: + dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); + goto unlock_pf; + } + + /* fill VSI slot in the PF struct */ + pf->vsi[pf->next_vsi] = vsi; + + /* prepare pf->next_vsi for next use */ + pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi, + pf->next_vsi); + goto unlock_pf; + +err_rings: + devm_kfree(&pf->pdev->dev, vsi); + vsi = NULL; +unlock_pf: + mutex_unlock(&pf->sw_mutex); + return vsi; +} + +/** + * ice_free_irq_msix_misc - Unroll misc vector setup + * @pf: board private structure + */ +static void ice_free_irq_msix_misc(struct ice_pf *pf) +{ + /* disable OICR interrupt */ + wr32(&pf->hw, PFINT_OICR_ENA, 0); + ice_flush(&pf->hw); + + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags) && pf->msix_entries) { + synchronize_irq(pf->msix_entries[pf->oicr_idx].vector); + devm_free_irq(&pf->pdev->dev, + pf->msix_entries[pf->oicr_idx].vector, pf); + } + + ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID); +} + +/** + * ice_req_irq_msix_misc - Setup the misc vector to handle non queue events + * @pf: board private structure + * + * This sets up the handler for MSIX 0, which is used to manage the + * non-queue interrupts, e.g. AdminQ and errors. This is not used + * when in MSI or Legacy interrupt mode. + */ +static int ice_req_irq_msix_misc(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + int oicr_idx, err = 0; + u8 itr_gran; + u32 val; + + if (!pf->int_name[0]) + snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc", + dev_driver_string(&pf->pdev->dev), + dev_name(&pf->pdev->dev)); + + /* Do not request IRQ but do enable OICR interrupt since settings are + * lost during reset. Note that this function is called only during + * rebuild path and not while reset is in progress. + */ + if (ice_is_reset_recovery_pending(pf->state)) + goto skip_req_irq; + + /* reserve one vector in irq_tracker for misc interrupts */ + oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); + if (oicr_idx < 0) + return oicr_idx; + + pf->oicr_idx = oicr_idx; + + err = devm_request_irq(&pf->pdev->dev, + pf->msix_entries[pf->oicr_idx].vector, + ice_misc_intr, 0, pf->int_name, pf); + if (err) { + dev_err(&pf->pdev->dev, + "devm_request_irq for %s failed: %d\n", + pf->int_name, err); + ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); + return err; + } + +skip_req_irq: + ice_ena_misc_vector(pf); + + val = ((pf->oicr_idx & PFINT_OICR_CTL_MSIX_INDX_M) | + PFINT_OICR_CTL_CAUSE_ENA_M); + wr32(hw, PFINT_OICR_CTL, val); + + /* This enables Admin queue Interrupt causes */ + val = ((pf->oicr_idx & PFINT_FW_CTL_MSIX_INDX_M) | + PFINT_FW_CTL_CAUSE_ENA_M); + wr32(hw, PFINT_FW_CTL, val); + + itr_gran = hw->itr_gran_200; + + wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx), + ITR_TO_REG(ICE_ITR_8K, itr_gran)); + + ice_flush(hw); + ice_irq_dynamic_ena(hw, NULL, NULL); + + return 0; +} + +/** + * ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI + * @vsi: the VSI getting queues + * + * Return 0 on success and a negative value on error + */ +static int ice_vsi_get_qs_contig(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + int offset, ret = 0; + + mutex_lock(&pf->avail_q_mutex); + /* look for contiguous block of queues for tx */ + offset = bitmap_find_next_zero_area(pf->avail_txqs, ICE_MAX_TXQS, + 0, vsi->alloc_txq, 0); + if (offset < ICE_MAX_TXQS) { + int i; + + bitmap_set(pf->avail_txqs, offset, vsi->alloc_txq); + for (i = 0; i < vsi->alloc_txq; i++) + vsi->txq_map[i] = i + offset; + } else { + ret = -ENOMEM; + vsi->tx_mapping_mode = ICE_VSI_MAP_SCATTER; + } + + /* look for contiguous block of queues for rx */ + offset = bitmap_find_next_zero_area(pf->avail_rxqs, ICE_MAX_RXQS, + 0, vsi->alloc_rxq, 0); + if (offset < ICE_MAX_RXQS) { + int i; + + bitmap_set(pf->avail_rxqs, offset, vsi->alloc_rxq); + for (i = 0; i < vsi->alloc_rxq; i++) + vsi->rxq_map[i] = i + offset; + } else { + ret = -ENOMEM; + vsi->rx_mapping_mode = ICE_VSI_MAP_SCATTER; + } + mutex_unlock(&pf->avail_q_mutex); + + return ret; +} + +/** + * ice_vsi_get_qs_scatter - Assign a scattered queues to VSI + * @vsi: the VSI getting queues + * + * Return 0 on success and a negative value on error + */ +static int ice_vsi_get_qs_scatter(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + int i, index = 0; + + mutex_lock(&pf->avail_q_mutex); + + if (vsi->tx_mapping_mode == ICE_VSI_MAP_SCATTER) { + for (i = 0; i < vsi->alloc_txq; i++) { + index = find_next_zero_bit(pf->avail_txqs, + ICE_MAX_TXQS, index); + if (index < ICE_MAX_TXQS) { + set_bit(index, pf->avail_txqs); + vsi->txq_map[i] = index; + } else { + goto err_scatter_tx; + } + } + } + + if (vsi->rx_mapping_mode == ICE_VSI_MAP_SCATTER) { + for (i = 0; i < vsi->alloc_rxq; i++) { + index = find_next_zero_bit(pf->avail_rxqs, + ICE_MAX_RXQS, index); + if (index < ICE_MAX_RXQS) { + set_bit(index, pf->avail_rxqs); + vsi->rxq_map[i] = index; + } else { + goto err_scatter_rx; + } + } + } + + mutex_unlock(&pf->avail_q_mutex); + return 0; + +err_scatter_rx: + /* unflag any queues we have grabbed (i is failed position) */ + for (index = 0; index < i; index++) { + clear_bit(vsi->rxq_map[index], pf->avail_rxqs); + vsi->rxq_map[index] = 0; + } + i = vsi->alloc_txq; +err_scatter_tx: + /* i is either position of failed attempt or vsi->alloc_txq */ + for (index = 0; index < i; index++) { + clear_bit(vsi->txq_map[index], pf->avail_txqs); + vsi->txq_map[index] = 0; + } + + mutex_unlock(&pf->avail_q_mutex); + return -ENOMEM; +} + +/** + * ice_vsi_get_qs - Assign queues from PF to VSI + * @vsi: the VSI to assign queues to + * + * Returns 0 on success and a negative value on error + */ +static int ice_vsi_get_qs(struct ice_vsi *vsi) +{ + int ret = 0; + + vsi->tx_mapping_mode = ICE_VSI_MAP_CONTIG; + vsi->rx_mapping_mode = ICE_VSI_MAP_CONTIG; + + /* NOTE: ice_vsi_get_qs_contig() will set the rx/tx mapping + * modes individually to scatter if assigning contiguous queues + * to rx or tx fails + */ + ret = ice_vsi_get_qs_contig(vsi); + if (ret < 0) { + if (vsi->tx_mapping_mode == ICE_VSI_MAP_SCATTER) + vsi->alloc_txq = max_t(u16, vsi->alloc_txq, + ICE_MAX_SCATTER_TXQS); + if (vsi->rx_mapping_mode == ICE_VSI_MAP_SCATTER) + vsi->alloc_rxq = max_t(u16, vsi->alloc_rxq, + ICE_MAX_SCATTER_RXQS); + ret = ice_vsi_get_qs_scatter(vsi); + } + + return ret; +} + +/** + * ice_vsi_put_qs - Release queues from VSI to PF + * @vsi: the VSI thats going to release queues + */ +static void ice_vsi_put_qs(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + int i; + + mutex_lock(&pf->avail_q_mutex); + + for (i = 0; i < vsi->alloc_txq; i++) { + clear_bit(vsi->txq_map[i], pf->avail_txqs); + vsi->txq_map[i] = ICE_INVAL_Q_INDEX; + } + + for (i = 0; i < vsi->alloc_rxq; i++) { + clear_bit(vsi->rxq_map[i], pf->avail_rxqs); + vsi->rxq_map[i] = ICE_INVAL_Q_INDEX; + } + + mutex_unlock(&pf->avail_q_mutex); +} + +/** + * ice_free_q_vector - Free memory allocated for a specific interrupt vector + * @vsi: VSI having the memory freed + * @v_idx: index of the vector to be freed + */ +static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx) +{ + struct ice_q_vector *q_vector; + struct ice_ring *ring; + + if (!vsi->q_vectors[v_idx]) { + dev_dbg(&vsi->back->pdev->dev, "Queue vector at index %d not found\n", + v_idx); + return; + } + q_vector = vsi->q_vectors[v_idx]; + + ice_for_each_ring(ring, q_vector->tx) + ring->q_vector = NULL; + ice_for_each_ring(ring, q_vector->rx) + ring->q_vector = NULL; + + /* only VSI with an associated netdev is set up with NAPI */ + if (vsi->netdev) + netif_napi_del(&q_vector->napi); + + devm_kfree(&vsi->back->pdev->dev, q_vector); + vsi->q_vectors[v_idx] = NULL; +} + +/** + * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors + * @vsi: the VSI having memory freed + */ +static void ice_vsi_free_q_vectors(struct ice_vsi *vsi) +{ + int v_idx; + + for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++) + ice_free_q_vector(vsi, v_idx); +} + +/** + * ice_cfg_netdev - Setup the netdev flags + * @vsi: the VSI being configured + * + * Returns 0 on success, negative value on failure + */ +static int ice_cfg_netdev(struct ice_vsi *vsi) +{ + netdev_features_t csumo_features; + netdev_features_t vlano_features; + netdev_features_t dflt_features; + netdev_features_t tso_features; + struct ice_netdev_priv *np; + struct net_device *netdev; + u8 mac_addr[ETH_ALEN]; + + netdev = alloc_etherdev_mqs(sizeof(struct ice_netdev_priv), + vsi->alloc_txq, vsi->alloc_rxq); + if (!netdev) + return -ENOMEM; + + vsi->netdev = netdev; + np = netdev_priv(netdev); + np->vsi = vsi; + + dflt_features = NETIF_F_SG | + NETIF_F_HIGHDMA | + NETIF_F_RXHASH; + + csumo_features = NETIF_F_RXCSUM | + NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM; + + vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; + + tso_features = NETIF_F_TSO; + + /* set features that user can change */ + netdev->hw_features = dflt_features | csumo_features | + vlano_features | tso_features; + + /* enable features */ + netdev->features |= netdev->hw_features; + /* encap and VLAN devices inherit default, csumo and tso features */ + netdev->hw_enc_features |= dflt_features | csumo_features | + tso_features; + netdev->vlan_features |= dflt_features | csumo_features | + tso_features; + + if (vsi->type == ICE_VSI_PF) { + SET_NETDEV_DEV(netdev, &vsi->back->pdev->dev); + ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr); + + ether_addr_copy(netdev->dev_addr, mac_addr); + ether_addr_copy(netdev->perm_addr, mac_addr); + } + + netdev->priv_flags |= IFF_UNICAST_FLT; + + /* assign netdev_ops */ + netdev->netdev_ops = &ice_netdev_ops; + + /* setup watchdog timeout value to be 5 second */ + netdev->watchdog_timeo = 5 * HZ; + + ice_set_ethtool_ops(netdev); + + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = ICE_MAX_MTU; + + return 0; +} + +/** + * ice_vsi_free_arrays - clean up vsi resources + * @vsi: pointer to VSI being cleared + * @free_qvectors: bool to specify if q_vectors should be deallocated + */ +static void ice_vsi_free_arrays(struct ice_vsi *vsi, bool free_qvectors) +{ + struct ice_pf *pf = vsi->back; + + /* free the ring and vector containers */ + if (free_qvectors && vsi->q_vectors) { + devm_kfree(&pf->pdev->dev, vsi->q_vectors); + vsi->q_vectors = NULL; + } + if (vsi->tx_rings) { + devm_kfree(&pf->pdev->dev, vsi->tx_rings); + vsi->tx_rings = NULL; + } + if (vsi->rx_rings) { + devm_kfree(&pf->pdev->dev, vsi->rx_rings); + vsi->rx_rings = NULL; + } +} + +/** + * ice_vsi_clear - clean up and deallocate the provided vsi + * @vsi: pointer to VSI being cleared + * + * This deallocates the vsi's queue resources, removes it from the PF's + * VSI array if necessary, and deallocates the VSI + * + * Returns 0 on success, negative on failure + */ +static int ice_vsi_clear(struct ice_vsi *vsi) +{ + struct ice_pf *pf = NULL; + + if (!vsi) + return 0; + + if (!vsi->back) + return -EINVAL; + + pf = vsi->back; + + if (!pf->vsi[vsi->idx] || pf->vsi[vsi->idx] != vsi) { + dev_dbg(&pf->pdev->dev, "vsi does not exist at pf->vsi[%d]\n", + vsi->idx); + return -EINVAL; + } + + mutex_lock(&pf->sw_mutex); + /* updates the PF for this cleared vsi */ + + pf->vsi[vsi->idx] = NULL; + if (vsi->idx < pf->next_vsi) + pf->next_vsi = vsi->idx; + + ice_vsi_free_arrays(vsi, true); + mutex_unlock(&pf->sw_mutex); + devm_kfree(&pf->pdev->dev, vsi); + + return 0; +} + +/** + * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector + * @vsi: the VSI being configured + * @v_idx: index of the vector in the vsi struct + * + * We allocate one q_vector. If allocation fails we return -ENOMEM. + */ +static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx) +{ + struct ice_pf *pf = vsi->back; + struct ice_q_vector *q_vector; + + /* allocate q_vector */ + q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL); + if (!q_vector) + return -ENOMEM; + + q_vector->vsi = vsi; + q_vector->v_idx = v_idx; + /* only set affinity_mask if the CPU is online */ + if (cpu_online(v_idx)) + cpumask_set_cpu(v_idx, &q_vector->affinity_mask); + + if (vsi->netdev) + netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll, + NAPI_POLL_WEIGHT); + /* tie q_vector and vsi together */ + vsi->q_vectors[v_idx] = q_vector; + + return 0; +} + +/** + * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors + * @vsi: the VSI being configured + * + * We allocate one q_vector per queue interrupt. If allocation fails we + * return -ENOMEM. + */ +static int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + int v_idx = 0, num_q_vectors; + int err; + + if (vsi->q_vectors[0]) { + dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n", + vsi->vsi_num); + return -EEXIST; + } + + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { + num_q_vectors = vsi->num_q_vectors; + } else { + err = -EINVAL; + goto err_out; + } + + for (v_idx = 0; v_idx < num_q_vectors; v_idx++) { + err = ice_vsi_alloc_q_vector(vsi, v_idx); + if (err) + goto err_out; + } + + return 0; + +err_out: + while (v_idx--) + ice_free_q_vector(vsi, v_idx); + + dev_err(&pf->pdev->dev, + "Failed to allocate %d q_vector for VSI %d, ret=%d\n", + vsi->num_q_vectors, vsi->vsi_num, err); + vsi->num_q_vectors = 0; + return err; +} + +/** + * ice_vsi_setup_vector_base - Set up the base vector for the given VSI + * @vsi: ptr to the VSI + * + * This should only be called after ice_vsi_alloc() which allocates the + * corresponding SW VSI structure and initializes num_queue_pairs for the + * newly allocated VSI. + * + * Returns 0 on success or negative on failure + */ +static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + int num_q_vectors = 0; + + if (vsi->base_vector) { + dev_dbg(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n", + vsi->vsi_num, vsi->base_vector); + return -EEXIST; + } + + if (!test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) + return -ENOENT; + + switch (vsi->type) { + case ICE_VSI_PF: + num_q_vectors = vsi->num_q_vectors; + break; + default: + dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n", + vsi->type); + break; + } + + if (num_q_vectors) + vsi->base_vector = ice_get_res(pf, pf->irq_tracker, + num_q_vectors, vsi->idx); + + if (vsi->base_vector < 0) { + dev_err(&pf->pdev->dev, + "Failed to get tracking for %d vectors for VSI %d, err=%d\n", + num_q_vectors, vsi->vsi_num, vsi->base_vector); + return -ENOENT; + } + + return 0; +} + +/** + * ice_fill_rss_lut - Fill the RSS lookup table with default values + * @lut: Lookup table + * @rss_table_size: Lookup table size + * @rss_size: Range of queue number for hashing + */ +void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size) +{ + u16 i; + + for (i = 0; i < rss_table_size; i++) + lut[i] = i % rss_size; +} + +/** + * ice_vsi_cfg_rss - Configure RSS params for a VSI + * @vsi: VSI to be configured + */ +static int ice_vsi_cfg_rss(struct ice_vsi *vsi) +{ + u8 seed[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE]; + struct ice_aqc_get_set_rss_keys *key; + struct ice_pf *pf = vsi->back; + enum ice_status status; + int err = 0; + u8 *lut; + + vsi->rss_size = min_t(int, vsi->rss_size, vsi->num_rxq); + + lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + if (vsi->rss_lut_user) + memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); + else + ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size); + + status = ice_aq_set_rss_lut(&pf->hw, vsi->vsi_num, vsi->rss_lut_type, + lut, vsi->rss_table_size); + + if (status) { + dev_err(&vsi->back->pdev->dev, + "set_rss_lut failed, error %d\n", status); + err = -EIO; + goto ice_vsi_cfg_rss_exit; + } + + key = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*key), GFP_KERNEL); + if (!key) { + err = -ENOMEM; + goto ice_vsi_cfg_rss_exit; + } + + if (vsi->rss_hkey_user) + memcpy(seed, vsi->rss_hkey_user, + ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE); + else + netdev_rss_key_fill((void *)seed, + ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE); + memcpy(&key->standard_rss_key, seed, + ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE); + + status = ice_aq_set_rss_key(&pf->hw, vsi->vsi_num, key); + + if (status) { + dev_err(&vsi->back->pdev->dev, "set_rss_key failed, error %d\n", + status); + err = -EIO; + } + + devm_kfree(&pf->pdev->dev, key); +ice_vsi_cfg_rss_exit: + devm_kfree(&pf->pdev->dev, lut); + return err; +} + +/** + * ice_vsi_reinit_setup - return resource and reallocate resource for a VSI + * @vsi: pointer to the ice_vsi + * + * This reallocates the VSIs queue resources + * + * Returns 0 on success and negative value on failure + */ +static int ice_vsi_reinit_setup(struct ice_vsi *vsi) +{ + u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; + int ret, i; + + if (!vsi) + return -EINVAL; + + ice_vsi_free_q_vectors(vsi); + ice_free_res(vsi->back->irq_tracker, vsi->base_vector, vsi->idx); + vsi->base_vector = 0; + ice_vsi_clear_rings(vsi); + ice_vsi_free_arrays(vsi, false); + ice_vsi_set_num_qs(vsi); + + /* Initialize VSI struct elements and create VSI in FW */ + ret = ice_vsi_add(vsi); + if (ret < 0) + goto err_vsi; + + ret = ice_vsi_alloc_arrays(vsi, false); + if (ret < 0) + goto err_vsi; + + switch (vsi->type) { + case ICE_VSI_PF: + if (!vsi->netdev) { + ret = ice_cfg_netdev(vsi); + if (ret) + goto err_rings; + + ret = register_netdev(vsi->netdev); + if (ret) + goto err_rings; + + netif_carrier_off(vsi->netdev); + netif_tx_stop_all_queues(vsi->netdev); + } + + ret = ice_vsi_alloc_q_vectors(vsi); + if (ret) + goto err_rings; + + ret = ice_vsi_setup_vector_base(vsi); + if (ret) + goto err_vectors; + + ret = ice_vsi_alloc_rings(vsi); + if (ret) + goto err_vectors; + + ice_vsi_map_rings_to_vectors(vsi); + break; + default: + break; + } + + ice_vsi_set_tc_cfg(vsi); + + /* configure VSI nodes based on number of queues and TC's */ + for (i = 0; i < vsi->tc_cfg.numtc; i++) + max_txqs[i] = vsi->num_txq; + + ret = ice_cfg_vsi_lan(vsi->port_info, vsi->vsi_num, + vsi->tc_cfg.ena_tc, max_txqs); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "Failed VSI lan queue config\n"); + goto err_vectors; + } + return 0; + +err_vectors: + ice_vsi_free_q_vectors(vsi); +err_rings: + if (vsi->netdev) { + vsi->current_netdev_flags = 0; + unregister_netdev(vsi->netdev); + free_netdev(vsi->netdev); + vsi->netdev = NULL; + } +err_vsi: + ice_vsi_clear(vsi); + set_bit(__ICE_RESET_FAILED, vsi->back->state); + return ret; +} + +/** + * ice_vsi_setup - Set up a VSI by a given type + * @pf: board private structure + * @type: VSI type + * @pi: pointer to the port_info instance + * + * This allocates the sw VSI structure and its queue resources. + * + * Returns pointer to the successfully allocated and configure VSI sw struct on + * success, otherwise returns NULL on failure. + */ +static struct ice_vsi * +ice_vsi_setup(struct ice_pf *pf, enum ice_vsi_type type, + struct ice_port_info *pi) +{ + u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; + struct device *dev = &pf->pdev->dev; + struct ice_vsi_ctx ctxt = { 0 }; + struct ice_vsi *vsi; + int ret, i; + + vsi = ice_vsi_alloc(pf, type); + if (!vsi) { + dev_err(dev, "could not allocate VSI\n"); + return NULL; + } + + vsi->port_info = pi; + vsi->vsw = pf->first_sw; + + if (ice_vsi_get_qs(vsi)) { + dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n", + vsi->idx); + goto err_get_qs; + } + + /* set RSS capabilities */ + ice_vsi_set_rss_params(vsi); + + /* create the VSI */ + ret = ice_vsi_add(vsi); + if (ret) + goto err_vsi; + + ctxt.vsi_num = vsi->vsi_num; + + switch (vsi->type) { + case ICE_VSI_PF: + ret = ice_cfg_netdev(vsi); + if (ret) + goto err_cfg_netdev; + + ret = register_netdev(vsi->netdev); + if (ret) + goto err_register_netdev; + + netif_carrier_off(vsi->netdev); + + /* make sure transmit queues start off as stopped */ + netif_tx_stop_all_queues(vsi->netdev); + ret = ice_vsi_alloc_q_vectors(vsi); + if (ret) + goto err_msix; + + ret = ice_vsi_setup_vector_base(vsi); + if (ret) + goto err_rings; + + ret = ice_vsi_alloc_rings(vsi); + if (ret) + goto err_rings; + + ice_vsi_map_rings_to_vectors(vsi); + + /* Do not exit if configuring RSS had an issue, at least + * receive traffic on first queue. Hence no need to capture + * return value + */ + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) + ice_vsi_cfg_rss(vsi); + break; + default: + /* if vsi type is not recognized, clean up the resources and + * exit + */ + goto err_rings; + } + + ice_vsi_set_tc_cfg(vsi); + + /* configure VSI nodes based on number of queues and TC's */ + for (i = 0; i < vsi->tc_cfg.numtc; i++) + max_txqs[i] = vsi->num_txq; + + ret = ice_cfg_vsi_lan(vsi->port_info, vsi->vsi_num, + vsi->tc_cfg.ena_tc, max_txqs); + if (ret) { + dev_info(&pf->pdev->dev, "Failed VSI lan queue config\n"); + goto err_rings; + } + + return vsi; + +err_rings: + ice_vsi_free_q_vectors(vsi); +err_msix: + if (vsi->netdev && vsi->netdev->reg_state == NETREG_REGISTERED) + unregister_netdev(vsi->netdev); +err_register_netdev: + if (vsi->netdev) { + free_netdev(vsi->netdev); + vsi->netdev = NULL; + } +err_cfg_netdev: + ret = ice_aq_free_vsi(&pf->hw, &ctxt, false, NULL); + if (ret) + dev_err(&vsi->back->pdev->dev, + "Free VSI AQ call failed, err %d\n", ret); +err_vsi: + ice_vsi_put_qs(vsi); +err_get_qs: + pf->q_left_tx += vsi->alloc_txq; + pf->q_left_rx += vsi->alloc_rxq; + ice_vsi_clear(vsi); + + return NULL; +} + +/** + * ice_vsi_add_vlan - Add vsi membership for given vlan + * @vsi: the vsi being configured + * @vid: vlan id to be added + */ +static int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid) +{ + struct ice_fltr_list_entry *tmp; + struct ice_pf *pf = vsi->back; + LIST_HEAD(tmp_add_list); + enum ice_status status; + int err = 0; + + tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + tmp->fltr_info.lkup_type = ICE_SW_LKUP_VLAN; + tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI; + tmp->fltr_info.flag = ICE_FLTR_TX; + tmp->fltr_info.src = vsi->vsi_num; + tmp->fltr_info.fwd_id.vsi_id = vsi->vsi_num; + tmp->fltr_info.l_data.vlan.vlan_id = vid; + + INIT_LIST_HEAD(&tmp->list_entry); + list_add(&tmp->list_entry, &tmp_add_list); + + status = ice_add_vlan(&pf->hw, &tmp_add_list); + if (status) { + err = -ENODEV; + dev_err(&pf->pdev->dev, "Failure Adding VLAN %d on VSI %i\n", + vid, vsi->vsi_num); + } + + ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + return err; +} + +/** + * ice_vlan_rx_add_vid - Add a vlan id filter to HW offload + * @netdev: network interface to be adjusted + * @proto: unused protocol + * @vid: vlan id to be added + * + * net_device_ops implementation for adding vlan ids + */ +static int ice_vlan_rx_add_vid(struct net_device *netdev, + __always_unused __be16 proto, u16 vid) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + int ret = 0; + + if (vid >= VLAN_N_VID) { + netdev_err(netdev, "VLAN id requested %d is out of range %d\n", + vid, VLAN_N_VID); + return -EINVAL; + } + + if (vsi->info.pvid) + return -EINVAL; + + /* Add all VLAN ids including 0 to the switch filter. VLAN id 0 is + * needed to continue allowing all untagged packets since VLAN prune + * list is applied to all packets by the switch + */ + ret = ice_vsi_add_vlan(vsi, vid); + + if (!ret) + set_bit(vid, vsi->active_vlans); + + return ret; +} + +/** + * ice_vsi_kill_vlan - Remove VSI membership for a given VLAN + * @vsi: the VSI being configured + * @vid: VLAN id to be removed + */ +static void ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid) +{ + struct ice_fltr_list_entry *list; + struct ice_pf *pf = vsi->back; + LIST_HEAD(tmp_add_list); + + list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL); + if (!list) + return; + + list->fltr_info.lkup_type = ICE_SW_LKUP_VLAN; + list->fltr_info.fwd_id.vsi_id = vsi->vsi_num; + list->fltr_info.fltr_act = ICE_FWD_TO_VSI; + list->fltr_info.l_data.vlan.vlan_id = vid; + list->fltr_info.flag = ICE_FLTR_TX; + list->fltr_info.src = vsi->vsi_num; + + INIT_LIST_HEAD(&list->list_entry); + list_add(&list->list_entry, &tmp_add_list); + + if (ice_remove_vlan(&pf->hw, &tmp_add_list)) + dev_err(&pf->pdev->dev, "Error removing VLAN %d on vsi %i\n", + vid, vsi->vsi_num); + + ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); +} + +/** + * ice_vlan_rx_kill_vid - Remove a vlan id filter from HW offload + * @netdev: network interface to be adjusted + * @proto: unused protocol + * @vid: vlan id to be removed + * + * net_device_ops implementation for removing vlan ids + */ +static int ice_vlan_rx_kill_vid(struct net_device *netdev, + __always_unused __be16 proto, u16 vid) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + + if (vsi->info.pvid) + return -EINVAL; + + /* return code is ignored as there is nothing a user + * can do about failure to remove and a log message was + * already printed from the other function + */ + ice_vsi_kill_vlan(vsi, vid); + + clear_bit(vid, vsi->active_vlans); + + return 0; +} + +/** + * ice_setup_pf_sw - Setup the HW switch on startup or after reset + * @pf: board private structure + * + * Returns 0 on success, negative value on failure + */ +static int ice_setup_pf_sw(struct ice_pf *pf) +{ + LIST_HEAD(tmp_add_list); + u8 broadcast[ETH_ALEN]; + struct ice_vsi *vsi; + int status = 0; + + if (!ice_is_reset_recovery_pending(pf->state)) { + vsi = ice_vsi_setup(pf, ICE_VSI_PF, pf->hw.port_info); + if (!vsi) { + status = -ENOMEM; + goto error_exit; + } + } else { + vsi = pf->vsi[0]; + status = ice_vsi_reinit_setup(vsi); + if (status < 0) + return -EIO; + } + + /* tmp_add_list contains a list of MAC addresses for which MAC + * filters need to be programmed. Add the VSI's unicast MAC to + * this list + */ + status = ice_add_mac_to_list(vsi, &tmp_add_list, + vsi->port_info->mac.perm_addr); + if (status) + goto error_exit; + + /* VSI needs to receive broadcast traffic, so add the broadcast + * MAC address to the list. + */ + eth_broadcast_addr(broadcast); + status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast); + if (status) + goto error_exit; + + /* program MAC filters for entries in tmp_add_list */ + status = ice_add_mac(&pf->hw, &tmp_add_list); + if (status) { + dev_err(&pf->pdev->dev, "Could not add MAC filters\n"); + status = -ENOMEM; + goto error_exit; + } + + ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + return status; + +error_exit: + ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + + if (vsi) { + ice_vsi_free_q_vectors(vsi); + if (vsi->netdev && vsi->netdev->reg_state == NETREG_REGISTERED) + unregister_netdev(vsi->netdev); + if (vsi->netdev) { + free_netdev(vsi->netdev); + vsi->netdev = NULL; + } + + ice_vsi_delete(vsi); + ice_vsi_put_qs(vsi); + pf->q_left_tx += vsi->alloc_txq; + pf->q_left_rx += vsi->alloc_rxq; + ice_vsi_clear(vsi); + } + return status; +} + +/** + * ice_determine_q_usage - Calculate queue distribution + * @pf: board private structure + * + * Return -ENOMEM if we don't get enough queues for all ports + */ +static void ice_determine_q_usage(struct ice_pf *pf) +{ + u16 q_left_tx, q_left_rx; + + q_left_tx = pf->hw.func_caps.common_cap.num_txq; + q_left_rx = pf->hw.func_caps.common_cap.num_rxq; + + pf->num_lan_tx = min_t(int, q_left_tx, num_online_cpus()); + + /* only 1 rx queue unless RSS is enabled */ + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) + pf->num_lan_rx = 1; + else + pf->num_lan_rx = min_t(int, q_left_rx, num_online_cpus()); + + pf->q_left_tx = q_left_tx - pf->num_lan_tx; + pf->q_left_rx = q_left_rx - pf->num_lan_rx; +} + +/** + * ice_deinit_pf - Unrolls initialziations done by ice_init_pf + * @pf: board private structure to initialize + */ +static void ice_deinit_pf(struct ice_pf *pf) +{ + if (pf->serv_tmr.function) + del_timer_sync(&pf->serv_tmr); + if (pf->serv_task.func) + cancel_work_sync(&pf->serv_task); + mutex_destroy(&pf->sw_mutex); + mutex_destroy(&pf->avail_q_mutex); +} + +/** + * ice_init_pf - Initialize general software structures (struct ice_pf) + * @pf: board private structure to initialize + */ +static void ice_init_pf(struct ice_pf *pf) +{ + bitmap_zero(pf->flags, ICE_PF_FLAGS_NBITS); + set_bit(ICE_FLAG_MSIX_ENA, pf->flags); + + mutex_init(&pf->sw_mutex); + mutex_init(&pf->avail_q_mutex); + + /* Clear avail_[t|r]x_qs bitmaps (set all to avail) */ + mutex_lock(&pf->avail_q_mutex); + bitmap_zero(pf->avail_txqs, ICE_MAX_TXQS); + bitmap_zero(pf->avail_rxqs, ICE_MAX_RXQS); + mutex_unlock(&pf->avail_q_mutex); + + if (pf->hw.func_caps.common_cap.rss_table_size) + set_bit(ICE_FLAG_RSS_ENA, pf->flags); + + /* setup service timer and periodic service task */ + timer_setup(&pf->serv_tmr, ice_service_timer, 0); + pf->serv_tmr_period = HZ; + INIT_WORK(&pf->serv_task, ice_service_task); + clear_bit(__ICE_SERVICE_SCHED, pf->state); +} + +/** + * ice_ena_msix_range - Request a range of MSIX vectors from the OS + * @pf: board private structure + * + * compute the number of MSIX vectors required (v_budget) and request from + * the OS. Return the number of vectors reserved or negative on failure + */ +static int ice_ena_msix_range(struct ice_pf *pf) +{ + int v_left, v_actual, v_budget = 0; + int needed, err, i; + + v_left = pf->hw.func_caps.common_cap.num_msix_vectors; + + /* reserve one vector for miscellaneous handler */ + needed = 1; + v_budget += needed; + v_left -= needed; + + /* reserve vectors for LAN traffic */ + pf->num_lan_msix = min_t(int, num_online_cpus(), v_left); + v_budget += pf->num_lan_msix; + + pf->msix_entries = devm_kcalloc(&pf->pdev->dev, v_budget, + sizeof(struct msix_entry), GFP_KERNEL); + + if (!pf->msix_entries) { + err = -ENOMEM; + goto exit_err; + } + + for (i = 0; i < v_budget; i++) + pf->msix_entries[i].entry = i; + + /* actually reserve the vectors */ + v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries, + ICE_MIN_MSIX, v_budget); + + if (v_actual < 0) { + dev_err(&pf->pdev->dev, "unable to reserve MSI-X vectors\n"); + err = v_actual; + goto msix_err; + } + + if (v_actual < v_budget) { + dev_warn(&pf->pdev->dev, + "not enough vectors. requested = %d, obtained = %d\n", + v_budget, v_actual); + if (v_actual >= (pf->num_lan_msix + 1)) { + pf->num_avail_msix = v_actual - (pf->num_lan_msix + 1); + } else if (v_actual >= 2) { + pf->num_lan_msix = 1; + pf->num_avail_msix = v_actual - 2; + } else { + pci_disable_msix(pf->pdev); + err = -ERANGE; + goto msix_err; + } + } + + return v_actual; + +msix_err: + devm_kfree(&pf->pdev->dev, pf->msix_entries); + goto exit_err; + +exit_err: + pf->num_lan_msix = 0; + clear_bit(ICE_FLAG_MSIX_ENA, pf->flags); + return err; +} + +/** + * ice_dis_msix - Disable MSI-X interrupt setup in OS + * @pf: board private structure + */ +static void ice_dis_msix(struct ice_pf *pf) +{ + pci_disable_msix(pf->pdev); + devm_kfree(&pf->pdev->dev, pf->msix_entries); + pf->msix_entries = NULL; + clear_bit(ICE_FLAG_MSIX_ENA, pf->flags); +} + +/** + * ice_init_interrupt_scheme - Determine proper interrupt scheme + * @pf: board private structure to initialize + */ +static int ice_init_interrupt_scheme(struct ice_pf *pf) +{ + int vectors = 0; + ssize_t size; + + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) + vectors = ice_ena_msix_range(pf); + else + return -ENODEV; + + if (vectors < 0) + return vectors; + + /* set up vector assignment tracking */ + size = sizeof(struct ice_res_tracker) + (sizeof(u16) * vectors); + + pf->irq_tracker = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL); + if (!pf->irq_tracker) { + ice_dis_msix(pf); + return -ENOMEM; + } + + pf->irq_tracker->num_entries = vectors; + + return 0; +} + +/** + * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme + * @pf: board private structure + */ +static void ice_clear_interrupt_scheme(struct ice_pf *pf) +{ + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) + ice_dis_msix(pf); + + if (pf->irq_tracker) { + devm_kfree(&pf->pdev->dev, pf->irq_tracker); + pf->irq_tracker = NULL; + } +} + +/** + * ice_probe - Device initialization routine + * @pdev: PCI device information struct + * @ent: entry in ice_pci_tbl + * + * Returns 0 on success, negative on failure + */ +static int ice_probe(struct pci_dev *pdev, + const struct pci_device_id __always_unused *ent) +{ + struct ice_pf *pf; + struct ice_hw *hw; + int err; + + /* this driver uses devres, see Documentation/driver-model/devres.txt */ + err = pcim_enable_device(pdev); + if (err) + return err; + + err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), pci_name(pdev)); + if (err) { + dev_err(&pdev->dev, "BAR0 I/O map error %d\n", err); + return err; + } + + pf = devm_kzalloc(&pdev->dev, sizeof(*pf), GFP_KERNEL); + if (!pf) + return -ENOMEM; + + /* set up for high or low dma */ + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, "DMA configuration failed: 0x%x\n", err); + return err; + } + + pci_enable_pcie_error_reporting(pdev); + pci_set_master(pdev); + + pf->pdev = pdev; + pci_set_drvdata(pdev, pf); + set_bit(__ICE_DOWN, pf->state); + + hw = &pf->hw; + hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0]; + hw->back = pf; + hw->vendor_id = pdev->vendor; + hw->device_id = pdev->device; + pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); + hw->subsystem_vendor_id = pdev->subsystem_vendor; + hw->subsystem_device_id = pdev->subsystem_device; + hw->bus.device = PCI_SLOT(pdev->devfn); + hw->bus.func = PCI_FUNC(pdev->devfn); + ice_set_ctrlq_len(hw); + + pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M); + +#ifndef CONFIG_DYNAMIC_DEBUG + if (debug < -1) + hw->debug_mask = debug; +#endif + + err = ice_init_hw(hw); + if (err) { + dev_err(&pdev->dev, "ice_init_hw failed: %d\n", err); + err = -EIO; + goto err_exit_unroll; + } + + dev_info(&pdev->dev, "firmware %d.%d.%05d api %d.%d\n", + hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build, + hw->api_maj_ver, hw->api_min_ver); + + ice_init_pf(pf); + + ice_determine_q_usage(pf); + + pf->num_alloc_vsi = min_t(u16, ICE_MAX_VSI_ALLOC, + hw->func_caps.guaranteed_num_vsi); + if (!pf->num_alloc_vsi) { + err = -EIO; + goto err_init_pf_unroll; + } + + pf->vsi = devm_kcalloc(&pdev->dev, pf->num_alloc_vsi, + sizeof(struct ice_vsi *), GFP_KERNEL); + if (!pf->vsi) { + err = -ENOMEM; + goto err_init_pf_unroll; + } + + err = ice_init_interrupt_scheme(pf); + if (err) { + dev_err(&pdev->dev, + "ice_init_interrupt_scheme failed: %d\n", err); + err = -EIO; + goto err_init_interrupt_unroll; + } + + /* In case of MSIX we are going to setup the misc vector right here + * to handle admin queue events etc. In case of legacy and MSI + * the misc functionality and queue processing is combined in + * the same vector and that gets setup at open. + */ + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { + err = ice_req_irq_msix_misc(pf); + if (err) { + dev_err(&pdev->dev, + "setup of misc vector failed: %d\n", err); + goto err_init_interrupt_unroll; + } + } + + /* create switch struct for the switch element created by FW on boot */ + pf->first_sw = devm_kzalloc(&pdev->dev, sizeof(struct ice_sw), + GFP_KERNEL); + if (!pf->first_sw) { + err = -ENOMEM; + goto err_msix_misc_unroll; + } + + pf->first_sw->bridge_mode = BRIDGE_MODE_VEB; + pf->first_sw->pf = pf; + + /* record the sw_id available for later use */ + pf->first_sw->sw_id = hw->port_info->sw_id; + + err = ice_setup_pf_sw(pf); + if (err) { + dev_err(&pdev->dev, + "probe failed due to setup pf switch:%d\n", err); + goto err_alloc_sw_unroll; + } + + /* Driver is mostly up */ + clear_bit(__ICE_DOWN, pf->state); + + /* since everything is good, start the service timer */ + mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); + + err = ice_init_link_events(pf->hw.port_info); + if (err) { + dev_err(&pdev->dev, "ice_init_link_events failed: %d\n", err); + goto err_alloc_sw_unroll; + } + + return 0; + +err_alloc_sw_unroll: + set_bit(__ICE_DOWN, pf->state); + devm_kfree(&pf->pdev->dev, pf->first_sw); +err_msix_misc_unroll: + ice_free_irq_msix_misc(pf); +err_init_interrupt_unroll: + ice_clear_interrupt_scheme(pf); + devm_kfree(&pdev->dev, pf->vsi); +err_init_pf_unroll: + ice_deinit_pf(pf); + ice_deinit_hw(hw); +err_exit_unroll: + pci_disable_pcie_error_reporting(pdev); + return err; +} + +/** + * ice_remove - Device removal routine + * @pdev: PCI device information struct + */ +static void ice_remove(struct pci_dev *pdev) +{ + struct ice_pf *pf = pci_get_drvdata(pdev); + int i = 0; + int err; + + if (!pf) + return; + + set_bit(__ICE_DOWN, pf->state); + + for (i = 0; i < pf->num_alloc_vsi; i++) { + if (!pf->vsi[i]) + continue; + + err = ice_vsi_release(pf->vsi[i]); + if (err) + dev_dbg(&pf->pdev->dev, "Failed to release VSI index %d (err %d)\n", + i, err); + } + + ice_free_irq_msix_misc(pf); + ice_clear_interrupt_scheme(pf); + ice_deinit_pf(pf); + ice_deinit_hw(&pf->hw); + pci_disable_pcie_error_reporting(pdev); +} + +/* ice_pci_tbl - PCI Device ID Table + * + * Wildcard entries (PCI_ANY_ID) should come last + * Last entry must be all 0s + * + * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, + * Class, Class Mask, private data (not used) } + */ +static const struct pci_device_id ice_pci_tbl[] = { + { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_BACKPLANE), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_QSFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_SFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_10G_BASE_T), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_SGMII), 0 }, + /* required last entry */ + { 0, } +}; +MODULE_DEVICE_TABLE(pci, ice_pci_tbl); + +static struct pci_driver ice_driver = { + .name = KBUILD_MODNAME, + .id_table = ice_pci_tbl, + .probe = ice_probe, + .remove = ice_remove, +}; + +/** + * ice_module_init - Driver registration routine + * + * ice_module_init is the first routine called when the driver is + * loaded. All it does is register with the PCI subsystem. + */ +static int __init ice_module_init(void) +{ + int status; + + pr_info("%s - version %s\n", ice_driver_string, ice_drv_ver); + pr_info("%s\n", ice_copyright); + + ice_wq = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, KBUILD_MODNAME); + if (!ice_wq) { + pr_err("Failed to create workqueue\n"); + return -ENOMEM; + } + + status = pci_register_driver(&ice_driver); + if (status) { + pr_err("failed to register pci driver, err %d\n", status); + destroy_workqueue(ice_wq); + } + + return status; +} +module_init(ice_module_init); + +/** + * ice_module_exit - Driver exit cleanup routine + * + * ice_module_exit is called just before the driver is removed + * from memory. + */ +static void __exit ice_module_exit(void) +{ + pci_unregister_driver(&ice_driver); + destroy_workqueue(ice_wq); + pr_info("module unloaded\n"); +} +module_exit(ice_module_exit); + +/** + * ice_set_mac_address - NDO callback to set mac address + * @netdev: network interface device structure + * @pi: pointer to an address structure + * + * Returns 0 on success, negative on failure + */ +static int ice_set_mac_address(struct net_device *netdev, void *pi) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + struct sockaddr *addr = pi; + enum ice_status status; + LIST_HEAD(a_mac_list); + LIST_HEAD(r_mac_list); + u8 flags = 0; + int err; + u8 *mac; + + mac = (u8 *)addr->sa_data; + + if (!is_valid_ether_addr(mac)) + return -EADDRNOTAVAIL; + + if (ether_addr_equal(netdev->dev_addr, mac)) { + netdev_warn(netdev, "already using mac %pM\n", mac); + return 0; + } + + if (test_bit(__ICE_DOWN, pf->state) || + ice_is_reset_recovery_pending(pf->state)) { + netdev_err(netdev, "can't set mac %pM. device not ready\n", + mac); + return -EBUSY; + } + + /* When we change the mac address we also have to change the mac address + * based filter rules that were created previously for the old mac + * address. So first, we remove the old filter rule using ice_remove_mac + * and then create a new filter rule using ice_add_mac. Note that for + * both these operations, we first need to form a "list" of mac + * addresses (even though in this case, we have only 1 mac address to be + * added/removed) and this done using ice_add_mac_to_list. Depending on + * the ensuing operation this "list" of mac addresses is either to be + * added or removed from the filter. + */ + err = ice_add_mac_to_list(vsi, &r_mac_list, netdev->dev_addr); + if (err) { + err = -EADDRNOTAVAIL; + goto free_lists; + } + + status = ice_remove_mac(hw, &r_mac_list); + if (status) { + err = -EADDRNOTAVAIL; + goto free_lists; + } + + err = ice_add_mac_to_list(vsi, &a_mac_list, mac); + if (err) { + err = -EADDRNOTAVAIL; + goto free_lists; + } + + status = ice_add_mac(hw, &a_mac_list); + if (status) { + err = -EADDRNOTAVAIL; + goto free_lists; + } + +free_lists: + /* free list entries */ + ice_free_fltr_list(&pf->pdev->dev, &r_mac_list); + ice_free_fltr_list(&pf->pdev->dev, &a_mac_list); + + if (err) { + netdev_err(netdev, "can't set mac %pM. filter update failed\n", + mac); + return err; + } + + /* change the netdev's mac address */ + memcpy(netdev->dev_addr, mac, netdev->addr_len); + netdev_dbg(vsi->netdev, "updated mac address to %pM\n", + netdev->dev_addr); + + /* write new mac address to the firmware */ + flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL; + status = ice_aq_manage_mac_write(hw, mac, flags, NULL); + if (status) { + netdev_err(netdev, "can't set mac %pM. write to firmware failed.\n", + mac); + } + return 0; +} + +/** + * ice_set_rx_mode - NDO callback to set the netdev filters + * @netdev: network interface device structure + */ +static void ice_set_rx_mode(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + + if (!vsi) + return; + + /* Set the flags to synchronize filters + * ndo_set_rx_mode may be triggered even without a change in netdev + * flags + */ + set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); + set_bit(ICE_FLAG_FLTR_SYNC, vsi->back->flags); + + /* schedule our worker thread which will take care of + * applying the new filter changes + */ + ice_service_task_schedule(vsi->back); +} + +/** + * ice_fdb_add - add an entry to the hardware database + * @ndm: the input from the stack + * @tb: pointer to array of nladdr (unused) + * @dev: the net device pointer + * @addr: the MAC address entry being added + * @vid: VLAN id + * @flags: instructions from stack about fdb operation + */ +static int ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[], + struct net_device *dev, const unsigned char *addr, + u16 vid, u16 flags) +{ + int err; + + if (vid) { + netdev_err(dev, "VLANs aren't supported yet for dev_uc|mc_add()\n"); + return -EINVAL; + } + if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { + netdev_err(dev, "FDB only supports static addresses\n"); + return -EINVAL; + } + + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) + err = dev_uc_add_excl(dev, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_add_excl(dev, addr); + else + err = -EINVAL; + + /* Only return duplicate errors if NLM_F_EXCL is set */ + if (err == -EEXIST && !(flags & NLM_F_EXCL)) + err = 0; + + return err; +} + +/** + * ice_fdb_del - delete an entry from the hardware database + * @ndm: the input from the stack + * @tb: pointer to array of nladdr (unused) + * @dev: the net device pointer + * @addr: the MAC address entry being added + * @vid: VLAN id + */ +static int ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr, + __always_unused u16 vid) +{ + int err; + + if (ndm->ndm_state & NUD_PERMANENT) { + netdev_err(dev, "FDB only supports static addresses\n"); + return -EINVAL; + } + + if (is_unicast_ether_addr(addr)) + err = dev_uc_del(dev, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_del(dev, addr); + else + err = -EINVAL; + + return err; +} + +/** + * ice_vsi_manage_vlan_insertion - Manage VLAN insertion for the VSI for Tx + * @vsi: the vsi being changed + */ +static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) +{ + struct device *dev = &vsi->back->pdev->dev; + struct ice_hw *hw = &vsi->back->hw; + struct ice_vsi_ctx ctxt = { 0 }; + enum ice_status status; + + /* Here we are configuring the VSI to let the driver add VLAN tags by + * setting vlan_flags to ICE_AQ_VSI_VLAN_MODE_ALL. The actual VLAN tag + * insertion happens in the Tx hot path, in ice_tx_map. + */ + ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL; + + ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); + ctxt.vsi_num = vsi->vsi_num; + + status = ice_aq_update_vsi(hw, &ctxt, NULL); + if (status) { + dev_err(dev, "update VSI for VLAN insert failed, err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + return -EIO; + } + + vsi->info.vlan_flags = ctxt.info.vlan_flags; + return 0; +} + +/** + * ice_vsi_manage_vlan_stripping - Manage VLAN stripping for the VSI for Rx + * @vsi: the vsi being changed + * @ena: boolean value indicating if this is a enable or disable request + */ +static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) +{ + struct device *dev = &vsi->back->pdev->dev; + struct ice_hw *hw = &vsi->back->hw; + struct ice_vsi_ctx ctxt = { 0 }; + enum ice_status status; + + /* Here we are configuring what the VSI should do with the VLAN tag in + * the Rx packet. We can either leave the tag in the packet or put it in + * the Rx descriptor. + */ + if (ena) { + /* Strip VLAN tag from Rx packet and put it in the desc */ + ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_STR_BOTH; + } else { + /* Disable stripping. Leave tag in packet */ + ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_NOTHING; + } + + /* Allow all packets untagged/tagged */ + ctxt.info.vlan_flags |= ICE_AQ_VSI_VLAN_MODE_ALL; + + ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); + ctxt.vsi_num = vsi->vsi_num; + + status = ice_aq_update_vsi(hw, &ctxt, NULL); + if (status) { + dev_err(dev, "update VSI for VALN strip failed, ena = %d err %d aq_err %d\n", + ena, status, hw->adminq.sq_last_status); + return -EIO; + } + + vsi->info.vlan_flags = ctxt.info.vlan_flags; + return 0; +} + +/** + * ice_set_features - set the netdev feature flags + * @netdev: ptr to the netdev being adjusted + * @features: the feature set that the stack is suggesting + */ +static int ice_set_features(struct net_device *netdev, + netdev_features_t features) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + int ret = 0; + + if ((features & NETIF_F_HW_VLAN_CTAG_RX) && + !(netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) + ret = ice_vsi_manage_vlan_stripping(vsi, true); + else if (!(features & NETIF_F_HW_VLAN_CTAG_RX) && + (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) + ret = ice_vsi_manage_vlan_stripping(vsi, false); + else if ((features & NETIF_F_HW_VLAN_CTAG_TX) && + !(netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) + ret = ice_vsi_manage_vlan_insertion(vsi); + else if (!(features & NETIF_F_HW_VLAN_CTAG_TX) && + (netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) + ret = ice_vsi_manage_vlan_insertion(vsi); + + return ret; +} + +/** + * ice_vsi_vlan_setup - Setup vlan offload properties on a VSI + * @vsi: VSI to setup vlan properties for + */ +static int ice_vsi_vlan_setup(struct ice_vsi *vsi) +{ + int ret = 0; + + if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) + ret = ice_vsi_manage_vlan_stripping(vsi, true); + if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_TX) + ret = ice_vsi_manage_vlan_insertion(vsi); + + return ret; +} + +/** + * ice_restore_vlan - Reinstate VLANs when vsi/netdev comes back up + * @vsi: the VSI being brought back up + */ +static int ice_restore_vlan(struct ice_vsi *vsi) +{ + int err; + u16 vid; + + if (!vsi->netdev) + return -EINVAL; + + err = ice_vsi_vlan_setup(vsi); + if (err) + return err; + + for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID) { + err = ice_vlan_rx_add_vid(vsi->netdev, htons(ETH_P_8021Q), vid); + if (err) + break; + } + + return err; +} + +/** + * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance + * @ring: The Tx ring to configure + * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized + * @pf_q: queue index in the PF space + * + * Configure the Tx descriptor ring in TLAN context. + */ +static void +ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) +{ + struct ice_vsi *vsi = ring->vsi; + struct ice_hw *hw = &vsi->back->hw; + + tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S; + + tlan_ctx->port_num = vsi->port_info->lport; + + /* Transmit Queue Length */ + tlan_ctx->qlen = ring->count; + + /* PF number */ + tlan_ctx->pf_num = hw->pf_id; + + /* queue belongs to a specific VSI type + * VF / VM index should be programmed per vmvf_type setting: + * for vmvf_type = VF, it is VF number between 0-256 + * for vmvf_type = VM, it is VM number between 0-767 + * for PF or EMP this field should be set to zero + */ + switch (vsi->type) { + case ICE_VSI_PF: + tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; + break; + default: + return; + } + + /* make sure the context is associated with the right VSI */ + tlan_ctx->src_vsi = vsi->vsi_num; + + tlan_ctx->tso_ena = ICE_TX_LEGACY; + tlan_ctx->tso_qnum = pf_q; + + /* Legacy or Advanced Host Interface: + * 0: Advanced Host Interface + * 1: Legacy Host Interface + */ + tlan_ctx->legacy_int = ICE_TX_LEGACY; +} + +/** + * ice_vsi_cfg_txqs - Configure the VSI for Tx + * @vsi: the VSI being configured + * + * Return 0 on success and a negative value on error + * Configure the Tx VSI for operation. + */ +static int ice_vsi_cfg_txqs(struct ice_vsi *vsi) +{ + struct ice_aqc_add_tx_qgrp *qg_buf; + struct ice_aqc_add_txqs_perq *txq; + struct ice_pf *pf = vsi->back; + enum ice_status status; + u16 buf_len, i, pf_q; + int err = 0, tc = 0; + u8 num_q_grps; + + buf_len = sizeof(struct ice_aqc_add_tx_qgrp); + qg_buf = devm_kzalloc(&pf->pdev->dev, buf_len, GFP_KERNEL); + if (!qg_buf) + return -ENOMEM; + + if (vsi->num_txq > ICE_MAX_TXQ_PER_TXQG) { + err = -EINVAL; + goto err_cfg_txqs; + } + qg_buf->num_txqs = 1; + num_q_grps = 1; + + /* set up and configure the tx queues */ + ice_for_each_txq(vsi, i) { + struct ice_tlan_ctx tlan_ctx = { 0 }; + + pf_q = vsi->txq_map[i]; + ice_setup_tx_ctx(vsi->tx_rings[i], &tlan_ctx, pf_q); + /* copy context contents into the qg_buf */ + qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q); + ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx, + ice_tlan_ctx_info); + + /* init queue specific tail reg. It is referred as transmit + * comm scheduler queue doorbell. + */ + vsi->tx_rings[i]->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q); + status = ice_ena_vsi_txq(vsi->port_info, vsi->vsi_num, tc, + num_q_grps, qg_buf, buf_len, NULL); + if (status) { + dev_err(&vsi->back->pdev->dev, + "Failed to set LAN Tx queue context, error: %d\n", + status); + err = -ENODEV; + goto err_cfg_txqs; + } + + /* Add Tx Queue TEID into the VSI tx ring from the response + * This will complete configuring and enabling the queue. + */ + txq = &qg_buf->txqs[0]; + if (pf_q == le16_to_cpu(txq->txq_id)) + vsi->tx_rings[i]->txq_teid = + le32_to_cpu(txq->q_teid); + } +err_cfg_txqs: + devm_kfree(&pf->pdev->dev, qg_buf); + return err; +} + +/** + * ice_setup_rx_ctx - Configure a receive ring context + * @ring: The Rx ring to configure + * + * Configure the Rx descriptor ring in RLAN context. + */ +static int ice_setup_rx_ctx(struct ice_ring *ring) +{ + struct ice_vsi *vsi = ring->vsi; + struct ice_hw *hw = &vsi->back->hw; + u32 rxdid = ICE_RXDID_FLEX_NIC; + struct ice_rlan_ctx rlan_ctx; + u32 regval; + u16 pf_q; + int err; + + /* what is RX queue number in global space of 2K rx queues */ + pf_q = vsi->rxq_map[ring->q_index]; + + /* clear the context structure first */ + memset(&rlan_ctx, 0, sizeof(rlan_ctx)); + + rlan_ctx.base = ring->dma >> ICE_RLAN_BASE_S; + + rlan_ctx.qlen = ring->count; + + /* Receive Packet Data Buffer Size. + * The Packet Data Buffer Size is defined in 128 byte units. + */ + rlan_ctx.dbuf = vsi->rx_buf_len >> ICE_RLAN_CTX_DBUF_S; + + /* use 32 byte descriptors */ + rlan_ctx.dsize = 1; + + /* Strip the Ethernet CRC bytes before the packet is posted to host + * memory. + */ + rlan_ctx.crcstrip = 1; + + /* L2TSEL flag defines the reported L2 Tags in the receive descriptor */ + rlan_ctx.l2tsel = 1; + + rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT; + rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT; + rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT; + + /* This controls whether VLAN is stripped from inner headers + * The VLAN in the inner L2 header is stripped to the receive + * descriptor if enabled by this flag. + */ + rlan_ctx.showiv = 0; + + /* Max packet size for this queue - must not be set to a larger value + * than 5 x DBUF + */ + rlan_ctx.rxmax = min_t(u16, vsi->max_frame, + ICE_MAX_CHAINED_RX_BUFS * vsi->rx_buf_len); + + /* Rx queue threshold in units of 64 */ + rlan_ctx.lrxqthresh = 1; + + /* Enable Flexible Descriptors in the queue context which + * allows this driver to select a specific receive descriptor format + */ + regval = rd32(hw, QRXFLXP_CNTXT(pf_q)); + regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) & + QRXFLXP_CNTXT_RXDID_IDX_M; + + /* increasing context priority to pick up profile id; + * default is 0x01; setting to 0x03 to ensure profile + * is programming if prev context is of same priority + */ + regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) & + QRXFLXP_CNTXT_RXDID_PRIO_M; + + wr32(hw, QRXFLXP_CNTXT(pf_q), regval); + + /* Absolute queue number out of 2K needs to be passed */ + err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q); + if (err) { + dev_err(&vsi->back->pdev->dev, + "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n", + pf_q, err); + return -EIO; + } + + /* init queue specific tail register */ + ring->tail = hw->hw_addr + QRX_TAIL(pf_q); + writel(0, ring->tail); + ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring)); + + return 0; +} + +/** + * ice_vsi_cfg_rxqs - Configure the VSI for Rx + * @vsi: the VSI being configured + * + * Return 0 on success and a negative value on error + * Configure the Rx VSI for operation. + */ +static int ice_vsi_cfg_rxqs(struct ice_vsi *vsi) +{ + int err = 0; + u16 i; + + if (vsi->netdev && vsi->netdev->mtu > ETH_DATA_LEN) + vsi->max_frame = vsi->netdev->mtu + + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + else + vsi->max_frame = ICE_RXBUF_2048; + + vsi->rx_buf_len = ICE_RXBUF_2048; + /* set up individual rings */ + for (i = 0; i < vsi->num_rxq && !err; i++) + err = ice_setup_rx_ctx(vsi->rx_rings[i]); + + if (err) { + dev_err(&vsi->back->pdev->dev, "ice_setup_rx_ctx failed\n"); + return -EIO; + } + return err; +} + +/** + * ice_vsi_cfg - Setup the VSI + * @vsi: the VSI being configured + * + * Return 0 on success and negative value on error + */ +static int ice_vsi_cfg(struct ice_vsi *vsi) +{ + int err; + + if (vsi->netdev) { + ice_set_rx_mode(vsi->netdev); + err = ice_restore_vlan(vsi); + if (err) + return err; + } + + err = ice_vsi_cfg_txqs(vsi); + if (!err) + err = ice_vsi_cfg_rxqs(vsi); + + return err; +} + +/** + * ice_vsi_stop_tx_rings - Disable Tx rings + * @vsi: the VSI being configured + */ +static int ice_vsi_stop_tx_rings(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + enum ice_status status; + u32 *q_teids, val; + u16 *q_ids, i; + int err = 0; + + if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS) + return -EINVAL; + + q_teids = devm_kcalloc(&pf->pdev->dev, vsi->num_txq, sizeof(*q_teids), + GFP_KERNEL); + if (!q_teids) + return -ENOMEM; + + q_ids = devm_kcalloc(&pf->pdev->dev, vsi->num_txq, sizeof(*q_ids), + GFP_KERNEL); + if (!q_ids) { + err = -ENOMEM; + goto err_alloc_q_ids; + } + + /* set up the tx queue list to be disabled */ + ice_for_each_txq(vsi, i) { + u16 v_idx; + + if (!vsi->tx_rings || !vsi->tx_rings[i]) { + err = -EINVAL; + goto err_out; + } + + q_ids[i] = vsi->txq_map[i]; + q_teids[i] = vsi->tx_rings[i]->txq_teid; + + /* clear cause_ena bit for disabled queues */ + val = rd32(hw, QINT_TQCTL(vsi->tx_rings[i]->reg_idx)); + val &= ~QINT_TQCTL_CAUSE_ENA_M; + wr32(hw, QINT_TQCTL(vsi->tx_rings[i]->reg_idx), val); + + /* software is expected to wait for 100 ns */ + ndelay(100); + + /* trigger a software interrupt for the vector associated to + * the queue to schedule napi handler + */ + v_idx = vsi->tx_rings[i]->q_vector->v_idx; + wr32(hw, GLINT_DYN_CTL(vsi->base_vector + v_idx), + GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M); + } + status = ice_dis_vsi_txq(vsi->port_info, vsi->num_txq, q_ids, q_teids, + NULL); + /* if the disable queue command was exercised during an active reset + * flow, ICE_ERR_RESET_ONGOING is returned. This is not an error as + * the reset operation disables queues at the hardware level anyway. + */ + if (status == ICE_ERR_RESET_ONGOING) { + dev_dbg(&pf->pdev->dev, + "Reset in progress. LAN Tx queues already disabled\n"); + } else if (status) { + dev_err(&pf->pdev->dev, + "Failed to disable LAN Tx queues, error: %d\n", + status); + err = -ENODEV; + } + +err_out: + devm_kfree(&pf->pdev->dev, q_ids); + +err_alloc_q_ids: + devm_kfree(&pf->pdev->dev, q_teids); + + return err; +} + +/** + * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled + * @pf: the PF being configured + * @pf_q: the PF queue + * @ena: enable or disable state of the queue + * + * This routine will wait for the given Rx queue of the PF to reach the + * enabled or disabled state. + * Returns -ETIMEDOUT in case of failing to reach the requested state after + * multiple retries; else will return 0 in case of success. + */ +static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena) +{ + int i; + + for (i = 0; i < ICE_Q_WAIT_RETRY_LIMIT; i++) { + u32 rx_reg = rd32(&pf->hw, QRX_CTRL(pf_q)); + + if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M)) + break; + + usleep_range(10, 20); + } + if (i >= ICE_Q_WAIT_RETRY_LIMIT) + return -ETIMEDOUT; + + return 0; +} + +/** + * ice_vsi_ctrl_rx_rings - Start or stop a VSI's rx rings + * @vsi: the VSI being configured + * @ena: start or stop the rx rings + */ +static int ice_vsi_ctrl_rx_rings(struct ice_vsi *vsi, bool ena) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + int i, j, ret = 0; + + for (i = 0; i < vsi->num_rxq; i++) { + int pf_q = vsi->rxq_map[i]; + u32 rx_reg; + + for (j = 0; j < ICE_Q_WAIT_MAX_RETRY; j++) { + rx_reg = rd32(hw, QRX_CTRL(pf_q)); + if (((rx_reg >> QRX_CTRL_QENA_REQ_S) & 1) == + ((rx_reg >> QRX_CTRL_QENA_STAT_S) & 1)) + break; + usleep_range(1000, 2000); + } + + /* Skip if the queue is already in the requested state */ + if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M)) + continue; + + /* turn on/off the queue */ + if (ena) + rx_reg |= QRX_CTRL_QENA_REQ_M; + else + rx_reg &= ~QRX_CTRL_QENA_REQ_M; + wr32(hw, QRX_CTRL(pf_q), rx_reg); + + /* wait for the change to finish */ + ret = ice_pf_rxq_wait(pf, pf_q, ena); + if (ret) { + dev_err(&pf->pdev->dev, + "VSI idx %d Rx ring %d %sable timeout\n", + vsi->idx, pf_q, (ena ? "en" : "dis")); + break; + } + } + + return ret; +} + +/** + * ice_vsi_start_rx_rings - start VSI's rx rings + * @vsi: the VSI whose rings are to be started + * + * Returns 0 on success and a negative value on error + */ +static int ice_vsi_start_rx_rings(struct ice_vsi *vsi) +{ + return ice_vsi_ctrl_rx_rings(vsi, true); +} + +/** + * ice_vsi_stop_rx_rings - stop VSI's rx rings + * @vsi: the VSI + * + * Returns 0 on success and a negative value on error + */ +static int ice_vsi_stop_rx_rings(struct ice_vsi *vsi) +{ + return ice_vsi_ctrl_rx_rings(vsi, false); +} + +/** + * ice_vsi_stop_tx_rx_rings - stop VSI's tx and rx rings + * @vsi: the VSI + * Returns 0 on success and a negative value on error + */ +static int ice_vsi_stop_tx_rx_rings(struct ice_vsi *vsi) +{ + int err_tx, err_rx; + + err_tx = ice_vsi_stop_tx_rings(vsi); + if (err_tx) + dev_dbg(&vsi->back->pdev->dev, "Failed to disable Tx rings\n"); + + err_rx = ice_vsi_stop_rx_rings(vsi); + if (err_rx) + dev_dbg(&vsi->back->pdev->dev, "Failed to disable Rx rings\n"); + + if (err_tx || err_rx) + return -EIO; + + return 0; +} + +/** + * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI + * @vsi: the VSI being configured + */ +static void ice_napi_enable_all(struct ice_vsi *vsi) +{ + int q_idx; + + if (!vsi->netdev) + return; + + for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) { + struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; + + if (q_vector->rx.ring || q_vector->tx.ring) + napi_enable(&q_vector->napi); + } +} + +/** + * ice_up_complete - Finish the last steps of bringing up a connection + * @vsi: The VSI being configured + * + * Return 0 on success and negative value on error + */ +static int ice_up_complete(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + int err; + + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) + ice_vsi_cfg_msix(vsi); + else + return -ENOTSUPP; + + /* Enable only Rx rings, Tx rings were enabled by the FW when the + * Tx queue group list was configured and the context bits were + * programmed using ice_vsi_cfg_txqs + */ + err = ice_vsi_start_rx_rings(vsi); + if (err) + return err; + + clear_bit(__ICE_DOWN, vsi->state); + ice_napi_enable_all(vsi); + ice_vsi_ena_irq(vsi); + + if (vsi->port_info && + (vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) && + vsi->netdev) { + ice_print_link_msg(vsi, true); + netif_tx_start_all_queues(vsi->netdev); + netif_carrier_on(vsi->netdev); + } + + /* clear this now, and the first stats read will be used as baseline */ + vsi->stat_offsets_loaded = false; + + ice_service_task_schedule(pf); + + return err; +} + +/** + * ice_up - Bring the connection back up after being down + * @vsi: VSI being configured + */ +int ice_up(struct ice_vsi *vsi) +{ + int err; + + err = ice_vsi_cfg(vsi); + if (!err) + err = ice_up_complete(vsi); + + return err; +} + +/** + * ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring + * @ring: Tx or Rx ring to read stats from + * @pkts: packets stats counter + * @bytes: bytes stats counter + * + * This function fetches stats from the ring considering the atomic operations + * that needs to be performed to read u64 values in 32 bit machine. + */ +static void ice_fetch_u64_stats_per_ring(struct ice_ring *ring, u64 *pkts, + u64 *bytes) +{ + unsigned int start; + *pkts = 0; + *bytes = 0; + + if (!ring) + return; + do { + start = u64_stats_fetch_begin_irq(&ring->syncp); + *pkts = ring->stats.pkts; + *bytes = ring->stats.bytes; + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); +} + +/** + * ice_stat_update40 - read 40 bit stat from the chip and update stat values + * @hw: ptr to the hardware info + * @hireg: high 32 bit HW register to read from + * @loreg: low 32 bit HW register to read from + * @prev_stat_loaded: bool to specify if previous stats are loaded + * @prev_stat: ptr to previous loaded stat value + * @cur_stat: ptr to current stat value + */ +static void ice_stat_update40(struct ice_hw *hw, u32 hireg, u32 loreg, + bool prev_stat_loaded, u64 *prev_stat, + u64 *cur_stat) +{ + u64 new_data; + + new_data = rd32(hw, loreg); + new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32; + + /* device stats are not reset at PFR, they likely will not be zeroed + * when the driver starts. So save the first values read and use them as + * offsets to be subtracted from the raw values in order to report stats + * that count from zero. + */ + if (!prev_stat_loaded) + *prev_stat = new_data; + if (likely(new_data >= *prev_stat)) + *cur_stat = new_data - *prev_stat; + else + /* to manage the potential roll-over */ + *cur_stat = (new_data + BIT_ULL(40)) - *prev_stat; + *cur_stat &= 0xFFFFFFFFFFULL; +} + +/** + * ice_stat_update32 - read 32 bit stat from the chip and update stat values + * @hw: ptr to the hardware info + * @reg: HW register to read from + * @prev_stat_loaded: bool to specify if previous stats are loaded + * @prev_stat: ptr to previous loaded stat value + * @cur_stat: ptr to current stat value + */ +static void ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, + u64 *prev_stat, u64 *cur_stat) +{ + u32 new_data; + + new_data = rd32(hw, reg); + + /* device stats are not reset at PFR, they likely will not be zeroed + * when the driver starts. So save the first values read and use them as + * offsets to be subtracted from the raw values in order to report stats + * that count from zero. + */ + if (!prev_stat_loaded) + *prev_stat = new_data; + if (likely(new_data >= *prev_stat)) + *cur_stat = new_data - *prev_stat; + else + /* to manage the potential roll-over */ + *cur_stat = (new_data + BIT_ULL(32)) - *prev_stat; +} + +/** + * ice_update_eth_stats - Update VSI-specific ethernet statistics counters + * @vsi: the VSI to be updated + */ +static void ice_update_eth_stats(struct ice_vsi *vsi) +{ + struct ice_eth_stats *prev_es, *cur_es; + struct ice_hw *hw = &vsi->back->hw; + u16 vsi_num = vsi->vsi_num; /* HW absolute index of a VSI */ + + prev_es = &vsi->eth_stats_prev; + cur_es = &vsi->eth_stats; + + ice_stat_update40(hw, GLV_GORCH(vsi_num), GLV_GORCL(vsi_num), + vsi->stat_offsets_loaded, &prev_es->rx_bytes, + &cur_es->rx_bytes); + + ice_stat_update40(hw, GLV_UPRCH(vsi_num), GLV_UPRCL(vsi_num), + vsi->stat_offsets_loaded, &prev_es->rx_unicast, + &cur_es->rx_unicast); + + ice_stat_update40(hw, GLV_MPRCH(vsi_num), GLV_MPRCL(vsi_num), + vsi->stat_offsets_loaded, &prev_es->rx_multicast, + &cur_es->rx_multicast); + + ice_stat_update40(hw, GLV_BPRCH(vsi_num), GLV_BPRCL(vsi_num), + vsi->stat_offsets_loaded, &prev_es->rx_broadcast, + &cur_es->rx_broadcast); + + ice_stat_update32(hw, GLV_RDPC(vsi_num), vsi->stat_offsets_loaded, + &prev_es->rx_discards, &cur_es->rx_discards); + + ice_stat_update40(hw, GLV_GOTCH(vsi_num), GLV_GOTCL(vsi_num), + vsi->stat_offsets_loaded, &prev_es->tx_bytes, + &cur_es->tx_bytes); + + ice_stat_update40(hw, GLV_UPTCH(vsi_num), GLV_UPTCL(vsi_num), + vsi->stat_offsets_loaded, &prev_es->tx_unicast, + &cur_es->tx_unicast); + + ice_stat_update40(hw, GLV_MPTCH(vsi_num), GLV_MPTCL(vsi_num), + vsi->stat_offsets_loaded, &prev_es->tx_multicast, + &cur_es->tx_multicast); + + ice_stat_update40(hw, GLV_BPTCH(vsi_num), GLV_BPTCL(vsi_num), + vsi->stat_offsets_loaded, &prev_es->tx_broadcast, + &cur_es->tx_broadcast); + + ice_stat_update32(hw, GLV_TEPC(vsi_num), vsi->stat_offsets_loaded, + &prev_es->tx_errors, &cur_es->tx_errors); + + vsi->stat_offsets_loaded = true; +} + +/** + * ice_update_vsi_ring_stats - Update VSI stats counters + * @vsi: the VSI to be updated + */ +static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) +{ + struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats; + struct ice_ring *ring; + u64 pkts, bytes; + int i; + + /* reset netdev stats */ + vsi_stats->tx_packets = 0; + vsi_stats->tx_bytes = 0; + vsi_stats->rx_packets = 0; + vsi_stats->rx_bytes = 0; + + /* reset non-netdev (extended) stats */ + vsi->tx_restart = 0; + vsi->tx_busy = 0; + vsi->tx_linearize = 0; + vsi->rx_buf_failed = 0; + vsi->rx_page_failed = 0; + + rcu_read_lock(); + + /* update Tx rings counters */ + ice_for_each_txq(vsi, i) { + ring = READ_ONCE(vsi->tx_rings[i]); + ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes); + vsi_stats->tx_packets += pkts; + vsi_stats->tx_bytes += bytes; + vsi->tx_restart += ring->tx_stats.restart_q; + vsi->tx_busy += ring->tx_stats.tx_busy; + vsi->tx_linearize += ring->tx_stats.tx_linearize; + } + + /* update Rx rings counters */ + ice_for_each_rxq(vsi, i) { + ring = READ_ONCE(vsi->rx_rings[i]); + ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes); + vsi_stats->rx_packets += pkts; + vsi_stats->rx_bytes += bytes; + vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed; + vsi->rx_page_failed += ring->rx_stats.alloc_page_failed; + } + + rcu_read_unlock(); +} + +/** + * ice_update_vsi_stats - Update VSI stats counters + * @vsi: the VSI to be updated + */ +static void ice_update_vsi_stats(struct ice_vsi *vsi) +{ + struct rtnl_link_stats64 *cur_ns = &vsi->net_stats; + struct ice_eth_stats *cur_es = &vsi->eth_stats; + struct ice_pf *pf = vsi->back; + + if (test_bit(__ICE_DOWN, vsi->state) || + test_bit(__ICE_CFG_BUSY, pf->state)) + return; + + /* get stats as recorded by Tx/Rx rings */ + ice_update_vsi_ring_stats(vsi); + + /* get VSI stats as recorded by the hardware */ + ice_update_eth_stats(vsi); + + cur_ns->tx_errors = cur_es->tx_errors; + cur_ns->rx_dropped = cur_es->rx_discards; + cur_ns->tx_dropped = cur_es->tx_discards; + cur_ns->multicast = cur_es->rx_multicast; + + /* update some more netdev stats if this is main VSI */ + if (vsi->type == ICE_VSI_PF) { + cur_ns->rx_crc_errors = pf->stats.crc_errors; + cur_ns->rx_errors = pf->stats.crc_errors + + pf->stats.illegal_bytes; + cur_ns->rx_length_errors = pf->stats.rx_len_errors; + } +} + +/** + * ice_update_pf_stats - Update PF port stats counters + * @pf: PF whose stats needs to be updated + */ +static void ice_update_pf_stats(struct ice_pf *pf) +{ + struct ice_hw_port_stats *prev_ps, *cur_ps; + struct ice_hw *hw = &pf->hw; + u8 pf_id; + + prev_ps = &pf->stats_prev; + cur_ps = &pf->stats; + pf_id = hw->pf_id; + + ice_stat_update40(hw, GLPRT_GORCH(pf_id), GLPRT_GORCL(pf_id), + pf->stat_prev_loaded, &prev_ps->eth.rx_bytes, + &cur_ps->eth.rx_bytes); + + ice_stat_update40(hw, GLPRT_UPRCH(pf_id), GLPRT_UPRCL(pf_id), + pf->stat_prev_loaded, &prev_ps->eth.rx_unicast, + &cur_ps->eth.rx_unicast); + + ice_stat_update40(hw, GLPRT_MPRCH(pf_id), GLPRT_MPRCL(pf_id), + pf->stat_prev_loaded, &prev_ps->eth.rx_multicast, + &cur_ps->eth.rx_multicast); + + ice_stat_update40(hw, GLPRT_BPRCH(pf_id), GLPRT_BPRCL(pf_id), + pf->stat_prev_loaded, &prev_ps->eth.rx_broadcast, + &cur_ps->eth.rx_broadcast); + + ice_stat_update40(hw, GLPRT_GOTCH(pf_id), GLPRT_GOTCL(pf_id), + pf->stat_prev_loaded, &prev_ps->eth.tx_bytes, + &cur_ps->eth.tx_bytes); + + ice_stat_update40(hw, GLPRT_UPTCH(pf_id), GLPRT_UPTCL(pf_id), + pf->stat_prev_loaded, &prev_ps->eth.tx_unicast, + &cur_ps->eth.tx_unicast); + + ice_stat_update40(hw, GLPRT_MPTCH(pf_id), GLPRT_MPTCL(pf_id), + pf->stat_prev_loaded, &prev_ps->eth.tx_multicast, + &cur_ps->eth.tx_multicast); + + ice_stat_update40(hw, GLPRT_BPTCH(pf_id), GLPRT_BPTCL(pf_id), + pf->stat_prev_loaded, &prev_ps->eth.tx_broadcast, + &cur_ps->eth.tx_broadcast); + + ice_stat_update32(hw, GLPRT_TDOLD(pf_id), pf->stat_prev_loaded, + &prev_ps->tx_dropped_link_down, + &cur_ps->tx_dropped_link_down); + + ice_stat_update40(hw, GLPRT_PRC64H(pf_id), GLPRT_PRC64L(pf_id), + pf->stat_prev_loaded, &prev_ps->rx_size_64, + &cur_ps->rx_size_64); + + ice_stat_update40(hw, GLPRT_PRC127H(pf_id), GLPRT_PRC127L(pf_id), + pf->stat_prev_loaded, &prev_ps->rx_size_127, + &cur_ps->rx_size_127); + + ice_stat_update40(hw, GLPRT_PRC255H(pf_id), GLPRT_PRC255L(pf_id), + pf->stat_prev_loaded, &prev_ps->rx_size_255, + &cur_ps->rx_size_255); + + ice_stat_update40(hw, GLPRT_PRC511H(pf_id), GLPRT_PRC511L(pf_id), + pf->stat_prev_loaded, &prev_ps->rx_size_511, + &cur_ps->rx_size_511); + + ice_stat_update40(hw, GLPRT_PRC1023H(pf_id), + GLPRT_PRC1023L(pf_id), pf->stat_prev_loaded, + &prev_ps->rx_size_1023, &cur_ps->rx_size_1023); + + ice_stat_update40(hw, GLPRT_PRC1522H(pf_id), + GLPRT_PRC1522L(pf_id), pf->stat_prev_loaded, + &prev_ps->rx_size_1522, &cur_ps->rx_size_1522); + + ice_stat_update40(hw, GLPRT_PRC9522H(pf_id), + GLPRT_PRC9522L(pf_id), pf->stat_prev_loaded, + &prev_ps->rx_size_big, &cur_ps->rx_size_big); + + ice_stat_update40(hw, GLPRT_PTC64H(pf_id), GLPRT_PTC64L(pf_id), + pf->stat_prev_loaded, &prev_ps->tx_size_64, + &cur_ps->tx_size_64); + + ice_stat_update40(hw, GLPRT_PTC127H(pf_id), GLPRT_PTC127L(pf_id), + pf->stat_prev_loaded, &prev_ps->tx_size_127, + &cur_ps->tx_size_127); + + ice_stat_update40(hw, GLPRT_PTC255H(pf_id), GLPRT_PTC255L(pf_id), + pf->stat_prev_loaded, &prev_ps->tx_size_255, + &cur_ps->tx_size_255); + + ice_stat_update40(hw, GLPRT_PTC511H(pf_id), GLPRT_PTC511L(pf_id), + pf->stat_prev_loaded, &prev_ps->tx_size_511, + &cur_ps->tx_size_511); + + ice_stat_update40(hw, GLPRT_PTC1023H(pf_id), + GLPRT_PTC1023L(pf_id), pf->stat_prev_loaded, + &prev_ps->tx_size_1023, &cur_ps->tx_size_1023); + + ice_stat_update40(hw, GLPRT_PTC1522H(pf_id), + GLPRT_PTC1522L(pf_id), pf->stat_prev_loaded, + &prev_ps->tx_size_1522, &cur_ps->tx_size_1522); + + ice_stat_update40(hw, GLPRT_PTC9522H(pf_id), + GLPRT_PTC9522L(pf_id), pf->stat_prev_loaded, + &prev_ps->tx_size_big, &cur_ps->tx_size_big); + + ice_stat_update32(hw, GLPRT_LXONRXC(pf_id), pf->stat_prev_loaded, + &prev_ps->link_xon_rx, &cur_ps->link_xon_rx); + + ice_stat_update32(hw, GLPRT_LXOFFRXC(pf_id), pf->stat_prev_loaded, + &prev_ps->link_xoff_rx, &cur_ps->link_xoff_rx); + + ice_stat_update32(hw, GLPRT_LXONTXC(pf_id), pf->stat_prev_loaded, + &prev_ps->link_xon_tx, &cur_ps->link_xon_tx); + + ice_stat_update32(hw, GLPRT_LXOFFTXC(pf_id), pf->stat_prev_loaded, + &prev_ps->link_xoff_tx, &cur_ps->link_xoff_tx); + + ice_stat_update32(hw, GLPRT_CRCERRS(pf_id), pf->stat_prev_loaded, + &prev_ps->crc_errors, &cur_ps->crc_errors); + + ice_stat_update32(hw, GLPRT_ILLERRC(pf_id), pf->stat_prev_loaded, + &prev_ps->illegal_bytes, &cur_ps->illegal_bytes); + + ice_stat_update32(hw, GLPRT_MLFC(pf_id), pf->stat_prev_loaded, + &prev_ps->mac_local_faults, + &cur_ps->mac_local_faults); + + ice_stat_update32(hw, GLPRT_MRFC(pf_id), pf->stat_prev_loaded, + &prev_ps->mac_remote_faults, + &cur_ps->mac_remote_faults); + + ice_stat_update32(hw, GLPRT_RLEC(pf_id), pf->stat_prev_loaded, + &prev_ps->rx_len_errors, &cur_ps->rx_len_errors); + + ice_stat_update32(hw, GLPRT_RUC(pf_id), pf->stat_prev_loaded, + &prev_ps->rx_undersize, &cur_ps->rx_undersize); + + ice_stat_update32(hw, GLPRT_RFC(pf_id), pf->stat_prev_loaded, + &prev_ps->rx_fragments, &cur_ps->rx_fragments); + + ice_stat_update32(hw, GLPRT_ROC(pf_id), pf->stat_prev_loaded, + &prev_ps->rx_oversize, &cur_ps->rx_oversize); + + ice_stat_update32(hw, GLPRT_RJC(pf_id), pf->stat_prev_loaded, + &prev_ps->rx_jabber, &cur_ps->rx_jabber); + + pf->stat_prev_loaded = true; +} + +/** + * ice_get_stats64 - get statistics for network device structure + * @netdev: network interface device structure + * @stats: main device statistics structure + */ +static +void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct rtnl_link_stats64 *vsi_stats; + struct ice_vsi *vsi = np->vsi; + + vsi_stats = &vsi->net_stats; + + if (test_bit(__ICE_DOWN, vsi->state) || !vsi->num_txq || !vsi->num_rxq) + return; + /* netdev packet/byte stats come from ring counter. These are obtained + * by summing up ring counters (done by ice_update_vsi_ring_stats). + */ + ice_update_vsi_ring_stats(vsi); + stats->tx_packets = vsi_stats->tx_packets; + stats->tx_bytes = vsi_stats->tx_bytes; + stats->rx_packets = vsi_stats->rx_packets; + stats->rx_bytes = vsi_stats->rx_bytes; + + /* The rest of the stats can be read from the hardware but instead we + * just return values that the watchdog task has already obtained from + * the hardware. + */ + stats->multicast = vsi_stats->multicast; + stats->tx_errors = vsi_stats->tx_errors; + stats->tx_dropped = vsi_stats->tx_dropped; + stats->rx_errors = vsi_stats->rx_errors; + stats->rx_dropped = vsi_stats->rx_dropped; + stats->rx_crc_errors = vsi_stats->rx_crc_errors; + stats->rx_length_errors = vsi_stats->rx_length_errors; +} + +/** + * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI + * @vsi: VSI having NAPI disabled + */ +static void ice_napi_disable_all(struct ice_vsi *vsi) +{ + int q_idx; + + if (!vsi->netdev) + return; + + for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) { + struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; + + if (q_vector->rx.ring || q_vector->tx.ring) + napi_disable(&q_vector->napi); + } +} + +/** + * ice_down - Shutdown the connection + * @vsi: The VSI being stopped + */ +int ice_down(struct ice_vsi *vsi) +{ + int i, err; + + /* Caller of this function is expected to set the + * vsi->state __ICE_DOWN bit + */ + if (vsi->netdev) { + netif_carrier_off(vsi->netdev); + netif_tx_disable(vsi->netdev); + } + + ice_vsi_dis_irq(vsi); + err = ice_vsi_stop_tx_rx_rings(vsi); + ice_napi_disable_all(vsi); + + ice_for_each_txq(vsi, i) + ice_clean_tx_ring(vsi->tx_rings[i]); + + ice_for_each_rxq(vsi, i) + ice_clean_rx_ring(vsi->rx_rings[i]); + + if (err) + netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n", + vsi->vsi_num, vsi->vsw->sw_id); + return err; +} + +/** + * ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources + * @vsi: VSI having resources allocated + * + * Return 0 on success, negative on failure + */ +static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) +{ + int i, err = 0; + + if (!vsi->num_txq) { + dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Tx queues\n", + vsi->vsi_num); + return -EINVAL; + } + + ice_for_each_txq(vsi, i) { + err = ice_setup_tx_ring(vsi->tx_rings[i]); + if (err) + break; + } + + return err; +} + +/** + * ice_vsi_setup_rx_rings - Allocate VSI Rx queue resources + * @vsi: VSI having resources allocated + * + * Return 0 on success, negative on failure + */ +static int ice_vsi_setup_rx_rings(struct ice_vsi *vsi) +{ + int i, err = 0; + + if (!vsi->num_rxq) { + dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Rx queues\n", + vsi->vsi_num); + return -EINVAL; + } + + ice_for_each_rxq(vsi, i) { + err = ice_setup_rx_ring(vsi->rx_rings[i]); + if (err) + break; + } + + return err; +} + +/** + * ice_vsi_req_irq - Request IRQ from the OS + * @vsi: The VSI IRQ is being requested for + * @basename: name for the vector + * + * Return 0 on success and a negative value on error + */ +static int ice_vsi_req_irq(struct ice_vsi *vsi, char *basename) +{ + struct ice_pf *pf = vsi->back; + int err = -EINVAL; + + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) + err = ice_vsi_req_irq_msix(vsi, basename); + + return err; +} + +/** + * ice_vsi_free_tx_rings - Free Tx resources for VSI queues + * @vsi: the VSI having resources freed + */ +static void ice_vsi_free_tx_rings(struct ice_vsi *vsi) +{ + int i; + + if (!vsi->tx_rings) + return; + + ice_for_each_txq(vsi, i) + if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) + ice_free_tx_ring(vsi->tx_rings[i]); +} + +/** + * ice_vsi_free_rx_rings - Free Rx resources for VSI queues + * @vsi: the VSI having resources freed + */ +static void ice_vsi_free_rx_rings(struct ice_vsi *vsi) +{ + int i; + + if (!vsi->rx_rings) + return; + + ice_for_each_rxq(vsi, i) + if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc) + ice_free_rx_ring(vsi->rx_rings[i]); +} + +/** + * ice_vsi_open - Called when a network interface is made active + * @vsi: the VSI to open + * + * Initialization of the VSI + * + * Returns 0 on success, negative value on error + */ +static int ice_vsi_open(struct ice_vsi *vsi) +{ + char int_name[ICE_INT_NAME_STR_LEN]; + struct ice_pf *pf = vsi->back; + int err; + + /* allocate descriptors */ + err = ice_vsi_setup_tx_rings(vsi); + if (err) + goto err_setup_tx; + + err = ice_vsi_setup_rx_rings(vsi); + if (err) + goto err_setup_rx; + + err = ice_vsi_cfg(vsi); + if (err) + goto err_setup_rx; + + snprintf(int_name, sizeof(int_name) - 1, "%s-%s", + dev_driver_string(&pf->pdev->dev), vsi->netdev->name); + err = ice_vsi_req_irq(vsi, int_name); + if (err) + goto err_setup_rx; + + /* Notify the stack of the actual queue counts. */ + err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_txq); + if (err) + goto err_set_qs; + + err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_rxq); + if (err) + goto err_set_qs; + + err = ice_up_complete(vsi); + if (err) + goto err_up_complete; + + return 0; + +err_up_complete: + ice_down(vsi); +err_set_qs: + ice_vsi_free_irq(vsi); +err_setup_rx: + ice_vsi_free_rx_rings(vsi); +err_setup_tx: + ice_vsi_free_tx_rings(vsi); + + return err; +} + +/** + * ice_vsi_close - Shut down a VSI + * @vsi: the VSI being shut down + */ +static void ice_vsi_close(struct ice_vsi *vsi) +{ + if (!test_and_set_bit(__ICE_DOWN, vsi->state)) + ice_down(vsi); + + ice_vsi_free_irq(vsi); + ice_vsi_free_tx_rings(vsi); + ice_vsi_free_rx_rings(vsi); +} + +/** + * ice_rss_clean - Delete RSS related VSI structures that hold user inputs + * @vsi: the VSI being removed + */ +static void ice_rss_clean(struct ice_vsi *vsi) +{ + struct ice_pf *pf; + + pf = vsi->back; + + if (vsi->rss_hkey_user) + devm_kfree(&pf->pdev->dev, vsi->rss_hkey_user); + if (vsi->rss_lut_user) + devm_kfree(&pf->pdev->dev, vsi->rss_lut_user); +} + +/** + * ice_vsi_release - Delete a VSI and free its resources + * @vsi: the VSI being removed + * + * Returns 0 on success or < 0 on error + */ +static int ice_vsi_release(struct ice_vsi *vsi) +{ + struct ice_pf *pf; + + if (!vsi->back) + return -ENODEV; + pf = vsi->back; + + if (vsi->netdev) { + unregister_netdev(vsi->netdev); + free_netdev(vsi->netdev); + vsi->netdev = NULL; + } + + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) + ice_rss_clean(vsi); + + /* Disable VSI and free resources */ + ice_vsi_dis_irq(vsi); + ice_vsi_close(vsi); + + /* reclaim interrupt vectors back to PF */ + ice_free_res(vsi->back->irq_tracker, vsi->base_vector, vsi->idx); + pf->num_avail_msix += vsi->num_q_vectors; + + ice_remove_vsi_fltr(&pf->hw, vsi->vsi_num); + ice_vsi_delete(vsi); + ice_vsi_free_q_vectors(vsi); + ice_vsi_clear_rings(vsi); + + ice_vsi_put_qs(vsi); + pf->q_left_tx += vsi->alloc_txq; + pf->q_left_rx += vsi->alloc_rxq; + + ice_vsi_clear(vsi); + + return 0; +} + +/** + * ice_dis_vsi - pause a VSI + * @vsi: the VSI being paused + */ +static void ice_dis_vsi(struct ice_vsi *vsi) +{ + if (test_bit(__ICE_DOWN, vsi->state)) + return; + + set_bit(__ICE_NEEDS_RESTART, vsi->state); + + if (vsi->netdev && netif_running(vsi->netdev) && + vsi->type == ICE_VSI_PF) + vsi->netdev->netdev_ops->ndo_stop(vsi->netdev); + + ice_vsi_close(vsi); +} + +/** + * ice_ena_vsi - resume a VSI + * @vsi: the VSI being resume + */ +static void ice_ena_vsi(struct ice_vsi *vsi) +{ + if (!test_and_clear_bit(__ICE_NEEDS_RESTART, vsi->state)) + return; + + if (vsi->netdev && netif_running(vsi->netdev)) + vsi->netdev->netdev_ops->ndo_open(vsi->netdev); + else if (ice_vsi_open(vsi)) + /* this clears the DOWN bit */ + dev_dbg(&vsi->back->pdev->dev, "Failed open VSI 0x%04X on switch 0x%04X\n", + vsi->vsi_num, vsi->vsw->sw_id); +} + +/** + * ice_pf_dis_all_vsi - Pause all VSIs on a PF + * @pf: the PF + */ +static void ice_pf_dis_all_vsi(struct ice_pf *pf) +{ + int v; + + ice_for_each_vsi(pf, v) + if (pf->vsi[v]) + ice_dis_vsi(pf->vsi[v]); +} + +/** + * ice_pf_ena_all_vsi - Resume all VSIs on a PF + * @pf: the PF + */ +static void ice_pf_ena_all_vsi(struct ice_pf *pf) +{ + int v; + + ice_for_each_vsi(pf, v) + if (pf->vsi[v]) + ice_ena_vsi(pf->vsi[v]); +} + +/** + * ice_rebuild - rebuild after reset + * @pf: pf to rebuild + */ +static void ice_rebuild(struct ice_pf *pf) +{ + struct device *dev = &pf->pdev->dev; + struct ice_hw *hw = &pf->hw; + enum ice_status ret; + int err; + + if (test_bit(__ICE_DOWN, pf->state)) + goto clear_recovery; + + dev_dbg(dev, "rebuilding pf\n"); + + ret = ice_init_all_ctrlq(hw); + if (ret) { + dev_err(dev, "control queues init failed %d\n", ret); + goto fail_reset; + } + + ret = ice_clear_pf_cfg(hw); + if (ret) { + dev_err(dev, "clear PF configuration failed %d\n", ret); + goto fail_reset; + } + + ice_clear_pxe_mode(hw); + + ret = ice_get_caps(hw); + if (ret) { + dev_err(dev, "ice_get_caps failed %d\n", ret); + goto fail_reset; + } + + /* basic nic switch setup */ + err = ice_setup_pf_sw(pf); + if (err) { + dev_err(dev, "ice_setup_pf_sw failed\n"); + goto fail_reset; + } + + /* start misc vector */ + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { + err = ice_req_irq_msix_misc(pf); + if (err) { + dev_err(dev, "misc vector setup failed: %d\n", err); + goto fail_reset; + } + } + + /* restart the VSIs that were rebuilt and running before the reset */ + ice_pf_ena_all_vsi(pf); + + return; + +fail_reset: + ice_shutdown_all_ctrlq(hw); + set_bit(__ICE_RESET_FAILED, pf->state); +clear_recovery: + set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); +} + +/** + * ice_change_mtu - NDO callback to change the MTU + * @netdev: network interface device structure + * @new_mtu: new value for maximum frame size + * + * Returns 0 on success, negative on failure + */ +static int ice_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + u8 count = 0; + + if (new_mtu == netdev->mtu) { + netdev_warn(netdev, "mtu is already %u\n", netdev->mtu); + return 0; + } + + if (new_mtu < netdev->min_mtu) { + netdev_err(netdev, "new mtu invalid. min_mtu is %d\n", + netdev->min_mtu); + return -EINVAL; + } else if (new_mtu > netdev->max_mtu) { + netdev_err(netdev, "new mtu invalid. max_mtu is %d\n", + netdev->min_mtu); + return -EINVAL; + } + /* if a reset is in progress, wait for some time for it to complete */ + do { + if (ice_is_reset_recovery_pending(pf->state)) { + count++; + usleep_range(1000, 2000); + } else { + break; + } + + } while (count < 100); + + if (count == 100) { + netdev_err(netdev, "can't change mtu. Device is busy\n"); + return -EBUSY; + } + + netdev->mtu = new_mtu; + + /* if VSI is up, bring it down and then back up */ + if (!test_and_set_bit(__ICE_DOWN, vsi->state)) { + int err; + + err = ice_down(vsi); + if (err) { + netdev_err(netdev, "change mtu if_up err %d\n", err); + return err; + } + + err = ice_up(vsi); + if (err) { + netdev_err(netdev, "change mtu if_up err %d\n", err); + return err; + } + } + + netdev_dbg(netdev, "changed mtu to %d\n", new_mtu); + return 0; +} + +/** + * ice_set_rss - Set RSS keys and lut + * @vsi: Pointer to VSI structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure + */ +int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + enum ice_status status; + + if (seed) { + struct ice_aqc_get_set_rss_keys *buf = + (struct ice_aqc_get_set_rss_keys *)seed; + + status = ice_aq_set_rss_key(hw, vsi->vsi_num, buf); + + if (status) { + dev_err(&pf->pdev->dev, + "Cannot set RSS key, err %d aq_err %d\n", + status, hw->adminq.rq_last_status); + return -EIO; + } + } + + if (lut) { + status = ice_aq_set_rss_lut(hw, vsi->vsi_num, + vsi->rss_lut_type, lut, lut_size); + if (status) { + dev_err(&pf->pdev->dev, + "Cannot set RSS lut, err %d aq_err %d\n", + status, hw->adminq.rq_last_status); + return -EIO; + } + } + + return 0; +} + +/** + * ice_get_rss - Get RSS keys and lut + * @vsi: Pointer to VSI structure + * @seed: Buffer to store the keys + * @lut: Buffer to store the lookup table entries + * @lut_size: Size of buffer to store the lookup table entries + * + * Returns 0 on success, negative on failure + */ +int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + enum ice_status status; + + if (seed) { + struct ice_aqc_get_set_rss_keys *buf = + (struct ice_aqc_get_set_rss_keys *)seed; + + status = ice_aq_get_rss_key(hw, vsi->vsi_num, buf); + if (status) { + dev_err(&pf->pdev->dev, + "Cannot get RSS key, err %d aq_err %d\n", + status, hw->adminq.rq_last_status); + return -EIO; + } + } + + if (lut) { + status = ice_aq_get_rss_lut(hw, vsi->vsi_num, + vsi->rss_lut_type, lut, lut_size); + if (status) { + dev_err(&pf->pdev->dev, + "Cannot get RSS lut, err %d aq_err %d\n", + status, hw->adminq.rq_last_status); + return -EIO; + } + } + + return 0; +} + +/** + * ice_open - Called when a network interface becomes active + * @netdev: network interface device structure + * + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). At this point all resources needed + * for transmit and receive operations are allocated, the interrupt + * handler is registered with the OS, the netdev watchdog is enabled, + * and the stack is notified that the interface is ready. + * + * Returns 0 on success, negative value on failure + */ +static int ice_open(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + int err; + + netif_carrier_off(netdev); + + err = ice_vsi_open(vsi); + + if (err) + netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n", + vsi->vsi_num, vsi->vsw->sw_id); + return err; +} + +/** + * ice_stop - Disables a network interface + * @netdev: network interface device structure + * + * The stop entry point is called when an interface is de-activated by the OS, + * and the netdevice enters the DOWN state. The hardware is still under the + * driver's control, but the netdev interface is disabled. + * + * Returns success only - not allowed to fail + */ +static int ice_stop(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + + ice_vsi_close(vsi); + + return 0; +} + +/** + * ice_features_check - Validate encapsulated packet conforms to limits + * @skb: skb buffer + * @netdev: This port's netdev + * @features: Offload features that the stack believes apply + */ +static netdev_features_t +ice_features_check(struct sk_buff *skb, + struct net_device __always_unused *netdev, + netdev_features_t features) +{ + size_t len; + + /* No point in doing any of this if neither checksum nor GSO are + * being requested for this frame. We can rule out both by just + * checking for CHECKSUM_PARTIAL + */ + if (skb->ip_summed != CHECKSUM_PARTIAL) + return features; + + /* We cannot support GSO if the MSS is going to be less than + * 64 bytes. If it is then we need to drop support for GSO. + */ + if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64)) + features &= ~NETIF_F_GSO_MASK; + + len = skb_network_header(skb) - skb->data; + if (len & ~(ICE_TXD_MACLEN_MAX)) + goto out_rm_features; + + len = skb_transport_header(skb) - skb_network_header(skb); + if (len & ~(ICE_TXD_IPLEN_MAX)) + goto out_rm_features; + + if (skb->encapsulation) { + len = skb_inner_network_header(skb) - skb_transport_header(skb); + if (len & ~(ICE_TXD_L4LEN_MAX)) + goto out_rm_features; + + len = skb_inner_transport_header(skb) - + skb_inner_network_header(skb); + if (len & ~(ICE_TXD_IPLEN_MAX)) + goto out_rm_features; + } + + return features; +out_rm_features: + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); +} + +static const struct net_device_ops ice_netdev_ops = { + .ndo_open = ice_open, + .ndo_stop = ice_stop, + .ndo_start_xmit = ice_start_xmit, + .ndo_features_check = ice_features_check, + .ndo_set_rx_mode = ice_set_rx_mode, + .ndo_set_mac_address = ice_set_mac_address, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = ice_change_mtu, + .ndo_get_stats64 = ice_get_stats64, + .ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid, + .ndo_set_features = ice_set_features, + .ndo_fdb_add = ice_fdb_add, + .ndo_fdb_del = ice_fdb_del, +}; diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c new file mode 100644 index 000000000..3274c5432 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +#include "ice_common.h" + +/** + * ice_aq_read_nvm + * @hw: pointer to the hw struct + * @module_typeid: module pointer location in words from the NVM beginning + * @offset: byte offset from the module beginning + * @length: length of the section to be read (in bytes from the offset) + * @data: command buffer (size [bytes] = length) + * @last_command: tells if this is the last command in a series + * @cd: pointer to command details structure or NULL + * + * Read the NVM using the admin queue commands (0x0701) + */ +static enum ice_status +ice_aq_read_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset, u16 length, + void *data, bool last_command, struct ice_sq_cd *cd) +{ + struct ice_aq_desc desc; + struct ice_aqc_nvm *cmd; + + cmd = &desc.params.nvm; + + /* In offset the highest byte must be zeroed. */ + if (offset & 0xFF000000) + return ICE_ERR_PARAM; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_read); + + /* If this is the last command in a series, set the proper flag. */ + if (last_command) + cmd->cmd_flags |= ICE_AQC_NVM_LAST_CMD; + cmd->module_typeid = cpu_to_le16(module_typeid); + cmd->offset_low = cpu_to_le16(offset & 0xFFFF); + cmd->offset_high = (offset >> 16) & 0xFF; + cmd->length = cpu_to_le16(length); + + return ice_aq_send_cmd(hw, &desc, data, length, cd); +} + +/** + * ice_check_sr_access_params - verify params for Shadow RAM R/W operations. + * @hw: pointer to the HW structure + * @offset: offset in words from module start + * @words: number of words to access + */ +static enum ice_status +ice_check_sr_access_params(struct ice_hw *hw, u32 offset, u16 words) +{ + if ((offset + words) > hw->nvm.sr_words) { + ice_debug(hw, ICE_DBG_NVM, + "NVM error: offset beyond SR lmt.\n"); + return ICE_ERR_PARAM; + } + + if (words > ICE_SR_SECTOR_SIZE_IN_WORDS) { + /* We can access only up to 4KB (one sector), in one AQ write */ + ice_debug(hw, ICE_DBG_NVM, + "NVM error: tried to access %d words, limit is %d.\n", + words, ICE_SR_SECTOR_SIZE_IN_WORDS); + return ICE_ERR_PARAM; + } + + if (((offset + (words - 1)) / ICE_SR_SECTOR_SIZE_IN_WORDS) != + (offset / ICE_SR_SECTOR_SIZE_IN_WORDS)) { + /* A single access cannot spread over two sectors */ + ice_debug(hw, ICE_DBG_NVM, + "NVM error: cannot spread over two sectors.\n"); + return ICE_ERR_PARAM; + } + + return 0; +} + +/** + * ice_read_sr_aq - Read Shadow RAM. + * @hw: pointer to the HW structure + * @offset: offset in words from module start + * @words: number of words to read + * @data: buffer for words reads from Shadow RAM + * @last_command: tells the AdminQ that this is the last command + * + * Reads 16-bit word buffers from the Shadow RAM using the admin command. + */ +static enum ice_status +ice_read_sr_aq(struct ice_hw *hw, u32 offset, u16 words, u16 *data, + bool last_command) +{ + enum ice_status status; + + status = ice_check_sr_access_params(hw, offset, words); + + /* values in "offset" and "words" parameters are sized as words + * (16 bits) but ice_aq_read_nvm expects these values in bytes. + * So do this conversion while calling ice_aq_read_nvm. + */ + if (!status) + status = ice_aq_read_nvm(hw, 0, 2 * offset, 2 * words, data, + last_command, NULL); + + return status; +} + +/** + * ice_read_sr_word_aq - Reads Shadow RAM via AQ + * @hw: pointer to the HW structure + * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF) + * @data: word read from the Shadow RAM + * + * Reads one 16 bit word from the Shadow RAM using the ice_read_sr_aq method. + */ +static enum ice_status +ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data) +{ + enum ice_status status; + + status = ice_read_sr_aq(hw, offset, 1, data, true); + if (!status) + *data = le16_to_cpu(*(__le16 *)data); + + return status; +} + +/** + * ice_acquire_nvm - Generic request for acquiring the NVM ownership + * @hw: pointer to the HW structure + * @access: NVM access type (read or write) + * + * This function will request NVM ownership. + */ +static enum ice_status +ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access) +{ + if (hw->nvm.blank_nvm_mode) + return 0; + + return ice_acquire_res(hw, ICE_NVM_RES_ID, access, ICE_NVM_TIMEOUT); +} + +/** + * ice_release_nvm - Generic request for releasing the NVM ownership + * @hw: pointer to the HW structure + * + * This function will release NVM ownership. + */ +static void ice_release_nvm(struct ice_hw *hw) +{ + if (hw->nvm.blank_nvm_mode) + return; + + ice_release_res(hw, ICE_NVM_RES_ID); +} + +/** + * ice_read_sr_word - Reads Shadow RAM word and acquire NVM if necessary + * @hw: pointer to the HW structure + * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF) + * @data: word read from the Shadow RAM + * + * Reads one 16 bit word from the Shadow RAM using the ice_read_sr_word_aq. + */ +static enum ice_status +ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data) +{ + enum ice_status status; + + status = ice_acquire_nvm(hw, ICE_RES_READ); + if (!status) { + status = ice_read_sr_word_aq(hw, offset, data); + ice_release_nvm(hw); + } + + return status; +} + +/** + * ice_init_nvm - initializes NVM setting + * @hw: pointer to the hw struct + * + * This function reads and populates NVM settings such as Shadow RAM size, + * max_timeout, and blank_nvm_mode + */ +enum ice_status ice_init_nvm(struct ice_hw *hw) +{ + struct ice_nvm_info *nvm = &hw->nvm; + u16 eetrack_lo, eetrack_hi; + enum ice_status status = 0; + u32 fla, gens_stat; + u8 sr_size; + + /* The SR size is stored regardless of the nvm programming mode + * as the blank mode may be used in the factory line. + */ + gens_stat = rd32(hw, GLNVM_GENS); + sr_size = (gens_stat & GLNVM_GENS_SR_SIZE_M) >> GLNVM_GENS_SR_SIZE_S; + + /* Switching to words (sr_size contains power of 2) */ + nvm->sr_words = BIT(sr_size) * ICE_SR_WORDS_IN_1KB; + + /* Check if we are in the normal or blank NVM programming mode */ + fla = rd32(hw, GLNVM_FLA); + if (fla & GLNVM_FLA_LOCKED_M) { /* Normal programming mode */ + nvm->blank_nvm_mode = false; + } else { /* Blank programming mode */ + nvm->blank_nvm_mode = true; + status = ICE_ERR_NVM_BLANK_MODE; + ice_debug(hw, ICE_DBG_NVM, + "NVM init error: unsupported blank mode.\n"); + return status; + } + + status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &hw->nvm.ver); + if (status) { + ice_debug(hw, ICE_DBG_INIT, + "Failed to read DEV starter version.\n"); + return status; + } + + status = ice_read_sr_word(hw, ICE_SR_NVM_EETRACK_LO, &eetrack_lo); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read EETRACK lo.\n"); + return status; + } + status = ice_read_sr_word(hw, ICE_SR_NVM_EETRACK_HI, &eetrack_hi); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read EETRACK hi.\n"); + return status; + } + + hw->nvm.eetrack = (eetrack_hi << 16) | eetrack_lo; + + return status; +} diff --git a/drivers/net/ethernet/intel/ice/ice_osdep.h b/drivers/net/ethernet/intel/ice/ice_osdep.h new file mode 100644 index 000000000..f57c414bc --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_osdep.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_OSDEP_H_ +#define _ICE_OSDEP_H_ + +#include <linux/types.h> +#include <linux/io.h> +#ifndef CONFIG_64BIT +#include <linux/io-64-nonatomic-lo-hi.h> +#endif + +#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg))) +#define rd32(a, reg) readl((a)->hw_addr + (reg)) +#define wr64(a, reg, value) writeq((value), ((a)->hw_addr + (reg))) +#define rd64(a, reg) readq((a)->hw_addr + (reg)) + +#define ice_flush(a) rd32((a), GLGEN_STAT) +#define ICE_M(m, s) ((m) << (s)) + +struct ice_dma_mem { + void *va; + dma_addr_t pa; + size_t size; +}; + +#define ice_hw_to_dev(ptr) \ + (&(container_of((ptr), struct ice_pf, hw))->pdev->dev) + +#ifdef CONFIG_DYNAMIC_DEBUG +#define ice_debug(hw, type, fmt, args...) \ + dev_dbg(ice_hw_to_dev(hw), fmt, ##args) + +#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \ + print_hex_dump_debug(KBUILD_MODNAME " ", \ + DUMP_PREFIX_OFFSET, rowsize, \ + groupsize, buf, len, false) +#else +#define ice_debug(hw, type, fmt, args...) \ +do { \ + if ((type) & (hw)->debug_mask) \ + dev_info(ice_hw_to_dev(hw), fmt, ##args); \ +} while (0) + +#ifdef DEBUG +#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \ +do { \ + if ((type) & (hw)->debug_mask) \ + print_hex_dump_debug(KBUILD_MODNAME, \ + DUMP_PREFIX_OFFSET, \ + rowsize, groupsize, buf, \ + len, false); \ +} while (0) +#else +#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \ +do { \ + struct ice_hw *hw_l = hw; \ + if ((type) & (hw_l)->debug_mask) { \ + u16 len_l = len; \ + u8 *buf_l = buf; \ + int i; \ + for (i = 0; i < (len_l - 16); i += 16) \ + ice_debug(hw_l, type, "0x%04X %16ph\n",\ + i, ((buf_l) + i)); \ + if (i < len_l) \ + ice_debug(hw_l, type, "0x%04X %*ph\n", \ + i, ((len_l) - i), ((buf_l) + i));\ + } \ +} while (0) +#endif /* DEBUG */ +#endif /* CONFIG_DYNAMIC_DEBUG */ + +#endif /* _ICE_OSDEP_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c new file mode 100644 index 000000000..eeae19946 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -0,0 +1,1658 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +#include "ice_sched.h" + +/** + * ice_sched_add_root_node - Insert the Tx scheduler root node in SW DB + * @pi: port information structure + * @info: Scheduler element information from firmware + * + * This function inserts the root node of the scheduling tree topology + * to the SW DB. + */ +static enum ice_status +ice_sched_add_root_node(struct ice_port_info *pi, + struct ice_aqc_txsched_elem_data *info) +{ + struct ice_sched_node *root; + struct ice_hw *hw; + u16 max_children; + + if (!pi) + return ICE_ERR_PARAM; + + hw = pi->hw; + + root = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*root), GFP_KERNEL); + if (!root) + return ICE_ERR_NO_MEMORY; + + max_children = le16_to_cpu(hw->layer_info[0].max_children); + root->children = devm_kcalloc(ice_hw_to_dev(hw), max_children, + sizeof(*root), GFP_KERNEL); + if (!root->children) { + devm_kfree(ice_hw_to_dev(hw), root); + return ICE_ERR_NO_MEMORY; + } + + memcpy(&root->info, info, sizeof(*info)); + pi->root = root; + return 0; +} + +/** + * ice_sched_find_node_by_teid - Find the Tx scheduler node in SW DB + * @start_node: pointer to the starting ice_sched_node struct in a sub-tree + * @teid: node teid to search + * + * This function searches for a node matching the teid in the scheduling tree + * from the SW DB. The search is recursive and is restricted by the number of + * layers it has searched through; stopping at the max supported layer. + * + * This function needs to be called when holding the port_info->sched_lock + */ +struct ice_sched_node * +ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid) +{ + u16 i; + + /* The TEID is same as that of the start_node */ + if (ICE_TXSCHED_GET_NODE_TEID(start_node) == teid) + return start_node; + + /* The node has no children or is at the max layer */ + if (!start_node->num_children || + start_node->tx_sched_layer >= ICE_AQC_TOPO_MAX_LEVEL_NUM || + start_node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF) + return NULL; + + /* Check if teid matches to any of the children nodes */ + for (i = 0; i < start_node->num_children; i++) + if (ICE_TXSCHED_GET_NODE_TEID(start_node->children[i]) == teid) + return start_node->children[i]; + + /* Search within each child's sub-tree */ + for (i = 0; i < start_node->num_children; i++) { + struct ice_sched_node *tmp; + + tmp = ice_sched_find_node_by_teid(start_node->children[i], + teid); + if (tmp) + return tmp; + } + + return NULL; +} + +/** + * ice_sched_add_node - Insert the Tx scheduler node in SW DB + * @pi: port information structure + * @layer: Scheduler layer of the node + * @info: Scheduler element information from firmware + * + * This function inserts a scheduler node to the SW DB. + */ +enum ice_status +ice_sched_add_node(struct ice_port_info *pi, u8 layer, + struct ice_aqc_txsched_elem_data *info) +{ + struct ice_sched_node *parent; + struct ice_sched_node *node; + struct ice_hw *hw; + u16 max_children; + + if (!pi) + return ICE_ERR_PARAM; + + hw = pi->hw; + + /* A valid parent node should be there */ + parent = ice_sched_find_node_by_teid(pi->root, + le32_to_cpu(info->parent_teid)); + if (!parent) { + ice_debug(hw, ICE_DBG_SCHED, + "Parent Node not found for parent_teid=0x%x\n", + le32_to_cpu(info->parent_teid)); + return ICE_ERR_PARAM; + } + + node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL); + if (!node) + return ICE_ERR_NO_MEMORY; + max_children = le16_to_cpu(hw->layer_info[layer].max_children); + if (max_children) { + node->children = devm_kcalloc(ice_hw_to_dev(hw), max_children, + sizeof(*node), GFP_KERNEL); + if (!node->children) { + devm_kfree(ice_hw_to_dev(hw), node); + return ICE_ERR_NO_MEMORY; + } + } + + node->in_use = true; + node->parent = parent; + node->tx_sched_layer = layer; + parent->children[parent->num_children++] = node; + memcpy(&node->info, info, sizeof(*info)); + return 0; +} + +/** + * ice_aq_delete_sched_elems - delete scheduler elements + * @hw: pointer to the hw struct + * @grps_req: number of groups to delete + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @grps_del: returns total number of elements deleted + * @cd: pointer to command details structure or NULL + * + * Delete scheduling elements (0x040F) + */ +static enum ice_status +ice_aq_delete_sched_elems(struct ice_hw *hw, u16 grps_req, + struct ice_aqc_delete_elem *buf, u16 buf_size, + u16 *grps_del, struct ice_sq_cd *cd) +{ + struct ice_aqc_add_move_delete_elem *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.add_move_delete_elem; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_delete_sched_elems); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + cmd->num_grps_req = cpu_to_le16(grps_req); + + status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); + if (!status && grps_del) + *grps_del = le16_to_cpu(cmd->num_grps_updated); + + return status; +} + +/** + * ice_sched_remove_elems - remove nodes from hw + * @hw: pointer to the hw struct + * @parent: pointer to the parent node + * @num_nodes: number of nodes + * @node_teids: array of node teids to be deleted + * + * This function remove nodes from hw + */ +static enum ice_status +ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent, + u16 num_nodes, u32 *node_teids) +{ + struct ice_aqc_delete_elem *buf; + u16 i, num_groups_removed = 0; + enum ice_status status; + u16 buf_size; + + buf_size = sizeof(*buf) + sizeof(u32) * (num_nodes - 1); + buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL); + if (!buf) + return ICE_ERR_NO_MEMORY; + buf->hdr.parent_teid = parent->info.node_teid; + buf->hdr.num_elems = cpu_to_le16(num_nodes); + for (i = 0; i < num_nodes; i++) + buf->teid[i] = cpu_to_le32(node_teids[i]); + status = ice_aq_delete_sched_elems(hw, 1, buf, buf_size, + &num_groups_removed, NULL); + if (status || num_groups_removed != 1) + ice_debug(hw, ICE_DBG_SCHED, "remove elements failed\n"); + devm_kfree(ice_hw_to_dev(hw), buf); + return status; +} + +/** + * ice_sched_get_first_node - get the first node of the given layer + * @hw: pointer to the hw struct + * @parent: pointer the base node of the subtree + * @layer: layer number + * + * This function retrieves the first node of the given layer from the subtree + */ +static struct ice_sched_node * +ice_sched_get_first_node(struct ice_hw *hw, struct ice_sched_node *parent, + u8 layer) +{ + u8 i; + + if (layer < hw->sw_entry_point_layer) + return NULL; + for (i = 0; i < parent->num_children; i++) { + struct ice_sched_node *node = parent->children[i]; + + if (node) { + if (node->tx_sched_layer == layer) + return node; + /* this recursion is intentional, and wouldn't + * go more than 9 calls + */ + return ice_sched_get_first_node(hw, node, layer); + } + } + return NULL; +} + +/** + * ice_sched_get_tc_node - get pointer to TC node + * @pi: port information structure + * @tc: TC number + * + * This function returns the TC node pointer + */ +struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc) +{ + u8 i; + + if (!pi) + return NULL; + for (i = 0; i < pi->root->num_children; i++) + if (pi->root->children[i]->tc_num == tc) + return pi->root->children[i]; + return NULL; +} + +/** + * ice_free_sched_node - Free a Tx scheduler node from SW DB + * @pi: port information structure + * @node: pointer to the ice_sched_node struct + * + * This function frees up a node from SW DB as well as from HW + * + * This function needs to be called with the port_info->sched_lock held + */ +void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node) +{ + struct ice_sched_node *parent; + struct ice_hw *hw = pi->hw; + u8 i, j; + + /* Free the children before freeing up the parent node + * The parent array is updated below and that shifts the nodes + * in the array. So always pick the first child if num children > 0 + */ + while (node->num_children) + ice_free_sched_node(pi, node->children[0]); + + /* Leaf, TC and root nodes can't be deleted by SW */ + if (node->tx_sched_layer >= hw->sw_entry_point_layer && + node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC && + node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT && + node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) { + u32 teid = le32_to_cpu(node->info.node_teid); + enum ice_status status; + + status = ice_sched_remove_elems(hw, node->parent, 1, &teid); + if (status) + ice_debug(hw, ICE_DBG_SCHED, + "remove element failed %d\n", status); + } + parent = node->parent; + /* root has no parent */ + if (parent) { + struct ice_sched_node *p, *tc_node; + + /* update the parent */ + for (i = 0; i < parent->num_children; i++) + if (parent->children[i] == node) { + for (j = i + 1; j < parent->num_children; j++) + parent->children[j - 1] = + parent->children[j]; + parent->num_children--; + break; + } + + /* search for previous sibling that points to this node and + * remove the reference + */ + tc_node = ice_sched_get_tc_node(pi, node->tc_num); + if (!tc_node) { + ice_debug(hw, ICE_DBG_SCHED, + "Invalid TC number %d\n", node->tc_num); + goto err_exit; + } + p = ice_sched_get_first_node(hw, tc_node, node->tx_sched_layer); + while (p) { + if (p->sibling == node) { + p->sibling = node->sibling; + break; + } + p = p->sibling; + } + } +err_exit: + /* leaf nodes have no children */ + if (node->children) + devm_kfree(ice_hw_to_dev(hw), node->children); + devm_kfree(ice_hw_to_dev(hw), node); +} + +/** + * ice_aq_get_dflt_topo - gets default scheduler topology + * @hw: pointer to the hw struct + * @lport: logical port number + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @num_branches: returns total number of queue to port branches + * @cd: pointer to command details structure or NULL + * + * Get default scheduler topology (0x400) + */ +static enum ice_status +ice_aq_get_dflt_topo(struct ice_hw *hw, u8 lport, + struct ice_aqc_get_topo_elem *buf, u16 buf_size, + u8 *num_branches, struct ice_sq_cd *cd) +{ + struct ice_aqc_get_topo *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.get_topo; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_dflt_topo); + cmd->port_num = lport; + status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); + if (!status && num_branches) + *num_branches = cmd->num_branches; + + return status; +} + +/** + * ice_aq_add_sched_elems - adds scheduling element + * @hw: pointer to the hw struct + * @grps_req: the number of groups that are requested to be added + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @grps_added: returns total number of groups added + * @cd: pointer to command details structure or NULL + * + * Add scheduling elements (0x0401) + */ +static enum ice_status +ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req, + struct ice_aqc_add_elem *buf, u16 buf_size, + u16 *grps_added, struct ice_sq_cd *cd) +{ + struct ice_aqc_add_move_delete_elem *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.add_move_delete_elem; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_sched_elems); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + cmd->num_grps_req = cpu_to_le16(grps_req); + status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); + if (!status && grps_added) + *grps_added = le16_to_cpu(cmd->num_grps_updated); + + return status; +} + +/** + * ice_suspend_resume_elems - suspend/resume scheduler elements + * @hw: pointer to the hw struct + * @elems_req: number of elements to suspend + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @elems_ret: returns total number of elements suspended + * @cd: pointer to command details structure or NULL + * @cmd_code: command code for suspend or resume + * + * suspend/resume scheduler elements + */ +static enum ice_status +ice_suspend_resume_elems(struct ice_hw *hw, u16 elems_req, + struct ice_aqc_suspend_resume_elem *buf, u16 buf_size, + u16 *elems_ret, struct ice_sq_cd *cd, + enum ice_adminq_opc cmd_code) +{ + struct ice_aqc_get_cfg_elem *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.get_update_elem; + ice_fill_dflt_direct_cmd_desc(&desc, cmd_code); + cmd->num_elem_req = cpu_to_le16(elems_req); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); + if (!status && elems_ret) + *elems_ret = le16_to_cpu(cmd->num_elem_resp); + return status; +} + +/** + * ice_aq_suspend_sched_elems - suspend scheduler elements + * @hw: pointer to the hw struct + * @elems_req: number of elements to suspend + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @elems_ret: returns total number of elements suspended + * @cd: pointer to command details structure or NULL + * + * Suspend scheduling elements (0x0409) + */ +static enum ice_status +ice_aq_suspend_sched_elems(struct ice_hw *hw, u16 elems_req, + struct ice_aqc_suspend_resume_elem *buf, + u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd) +{ + return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret, + cd, ice_aqc_opc_suspend_sched_elems); +} + +/** + * ice_aq_resume_sched_elems - resume scheduler elements + * @hw: pointer to the hw struct + * @elems_req: number of elements to resume + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @elems_ret: returns total number of elements resumed + * @cd: pointer to command details structure or NULL + * + * resume scheduling elements (0x040A) + */ +static enum ice_status +ice_aq_resume_sched_elems(struct ice_hw *hw, u16 elems_req, + struct ice_aqc_suspend_resume_elem *buf, + u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd) +{ + return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret, + cd, ice_aqc_opc_resume_sched_elems); +} + +/** + * ice_aq_query_sched_res - query scheduler resource + * @hw: pointer to the hw struct + * @buf_size: buffer size in bytes + * @buf: pointer to buffer + * @cd: pointer to command details structure or NULL + * + * Query scheduler resource allocation (0x0412) + */ +static enum ice_status +ice_aq_query_sched_res(struct ice_hw *hw, u16 buf_size, + struct ice_aqc_query_txsched_res_resp *buf, + struct ice_sq_cd *cd) +{ + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_sched_res); + return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); +} + +/** + * ice_sched_suspend_resume_elems - suspend or resume hw nodes + * @hw: pointer to the hw struct + * @num_nodes: number of nodes + * @node_teids: array of node teids to be suspended or resumed + * @suspend: true means suspend / false means resume + * + * This function suspends or resumes hw nodes + */ +static enum ice_status +ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids, + bool suspend) +{ + struct ice_aqc_suspend_resume_elem *buf; + u16 i, buf_size, num_elem_ret = 0; + enum ice_status status; + + buf_size = sizeof(*buf) * num_nodes; + buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL); + if (!buf) + return ICE_ERR_NO_MEMORY; + + for (i = 0; i < num_nodes; i++) + buf->teid[i] = cpu_to_le32(node_teids[i]); + + if (suspend) + status = ice_aq_suspend_sched_elems(hw, num_nodes, buf, + buf_size, &num_elem_ret, + NULL); + else + status = ice_aq_resume_sched_elems(hw, num_nodes, buf, + buf_size, &num_elem_ret, + NULL); + if (status || num_elem_ret != num_nodes) + ice_debug(hw, ICE_DBG_SCHED, "suspend/resume failed\n"); + + devm_kfree(ice_hw_to_dev(hw), buf); + return status; +} + +/** + * ice_sched_clear_tx_topo - clears the schduler tree nodes + * @pi: port information structure + * + * This function removes all the nodes from HW as well as from SW DB. + */ +static void ice_sched_clear_tx_topo(struct ice_port_info *pi) +{ + struct ice_sched_agg_info *agg_info; + struct ice_sched_vsi_info *vsi_elem; + struct ice_sched_agg_info *atmp; + struct ice_sched_vsi_info *tmp; + struct ice_hw *hw; + + if (!pi) + return; + + hw = pi->hw; + + list_for_each_entry_safe(agg_info, atmp, &pi->agg_list, list_entry) { + struct ice_sched_agg_vsi_info *agg_vsi_info; + struct ice_sched_agg_vsi_info *vtmp; + + list_for_each_entry_safe(agg_vsi_info, vtmp, + &agg_info->agg_vsi_list, list_entry) { + list_del(&agg_vsi_info->list_entry); + devm_kfree(ice_hw_to_dev(hw), agg_vsi_info); + } + } + + /* remove the vsi list */ + list_for_each_entry_safe(vsi_elem, tmp, &pi->vsi_info_list, + list_entry) { + list_del(&vsi_elem->list_entry); + devm_kfree(ice_hw_to_dev(hw), vsi_elem); + } + + if (pi->root) { + ice_free_sched_node(pi, pi->root); + pi->root = NULL; + } +} + +/** + * ice_sched_clear_port - clear the scheduler elements from SW DB for a port + * @pi: port information structure + * + * Cleanup scheduling elements from SW DB + */ +static void ice_sched_clear_port(struct ice_port_info *pi) +{ + if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY) + return; + + pi->port_state = ICE_SCHED_PORT_STATE_INIT; + mutex_lock(&pi->sched_lock); + ice_sched_clear_tx_topo(pi); + mutex_unlock(&pi->sched_lock); + mutex_destroy(&pi->sched_lock); +} + +/** + * ice_sched_cleanup_all - cleanup scheduler elements from SW DB for all ports + * @hw: pointer to the hw struct + * + * Cleanup scheduling elements from SW DB for all the ports + */ +void ice_sched_cleanup_all(struct ice_hw *hw) +{ + if (!hw || !hw->port_info) + return; + + if (hw->layer_info) + devm_kfree(ice_hw_to_dev(hw), hw->layer_info); + + ice_sched_clear_port(hw->port_info); + + hw->num_tx_sched_layers = 0; + hw->num_tx_sched_phys_layers = 0; + hw->flattened_layers = 0; + hw->max_cgds = 0; +} + +/** + * ice_sched_create_vsi_info_entry - create an empty new VSI entry + * @pi: port information structure + * @vsi_id: VSI Id + * + * This function creates a new VSI entry and adds it to list + */ +static struct ice_sched_vsi_info * +ice_sched_create_vsi_info_entry(struct ice_port_info *pi, u16 vsi_id) +{ + struct ice_sched_vsi_info *vsi_elem; + + if (!pi) + return NULL; + + vsi_elem = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*vsi_elem), + GFP_KERNEL); + if (!vsi_elem) + return NULL; + + list_add(&vsi_elem->list_entry, &pi->vsi_info_list); + vsi_elem->vsi_id = vsi_id; + return vsi_elem; +} + +/** + * ice_sched_add_elems - add nodes to hw and SW DB + * @pi: port information structure + * @tc_node: pointer to the branch node + * @parent: pointer to the parent node + * @layer: layer number to add nodes + * @num_nodes: number of nodes + * @num_nodes_added: pointer to num nodes added + * @first_node_teid: if new nodes are added then return the teid of first node + * + * This function add nodes to hw as well as to SW DB for a given layer + */ +static enum ice_status +ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, + struct ice_sched_node *parent, u8 layer, u16 num_nodes, + u16 *num_nodes_added, u32 *first_node_teid) +{ + struct ice_sched_node *prev, *new_node; + struct ice_aqc_add_elem *buf; + u16 i, num_groups_added = 0; + enum ice_status status = 0; + struct ice_hw *hw = pi->hw; + u16 buf_size; + u32 teid; + + buf_size = sizeof(*buf) + sizeof(*buf->generic) * (num_nodes - 1); + buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL); + if (!buf) + return ICE_ERR_NO_MEMORY; + + buf->hdr.parent_teid = parent->info.node_teid; + buf->hdr.num_elems = cpu_to_le16(num_nodes); + for (i = 0; i < num_nodes; i++) { + buf->generic[i].parent_teid = parent->info.node_teid; + buf->generic[i].data.elem_type = ICE_AQC_ELEM_TYPE_SE_GENERIC; + buf->generic[i].data.valid_sections = + ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR | + ICE_AQC_ELEM_VALID_EIR; + buf->generic[i].data.generic = 0; + buf->generic[i].data.cir_bw.bw_profile_idx = + ICE_SCHED_DFLT_RL_PROF_ID; + buf->generic[i].data.eir_bw.bw_profile_idx = + ICE_SCHED_DFLT_RL_PROF_ID; + } + + status = ice_aq_add_sched_elems(hw, 1, buf, buf_size, + &num_groups_added, NULL); + if (status || num_groups_added != 1) { + ice_debug(hw, ICE_DBG_SCHED, "add elements failed\n"); + devm_kfree(ice_hw_to_dev(hw), buf); + return ICE_ERR_CFG; + } + + *num_nodes_added = num_nodes; + /* add nodes to the SW DB */ + for (i = 0; i < num_nodes; i++) { + status = ice_sched_add_node(pi, layer, &buf->generic[i]); + if (status) { + ice_debug(hw, ICE_DBG_SCHED, + "add nodes in SW DB failed status =%d\n", + status); + break; + } + + teid = le32_to_cpu(buf->generic[i].node_teid); + new_node = ice_sched_find_node_by_teid(parent, teid); + + if (!new_node) { + ice_debug(hw, ICE_DBG_SCHED, + "Node is missing for teid =%d\n", teid); + break; + } + + new_node->sibling = NULL; + new_node->tc_num = tc_node->tc_num; + + /* add it to previous node sibling pointer */ + /* Note: siblings are not linked across branches */ + prev = ice_sched_get_first_node(hw, tc_node, layer); + + if (prev && prev != new_node) { + while (prev->sibling) + prev = prev->sibling; + prev->sibling = new_node; + } + + if (i == 0) + *first_node_teid = teid; + } + + devm_kfree(ice_hw_to_dev(hw), buf); + return status; +} + +/** + * ice_sched_add_nodes_to_layer - Add nodes to a given layer + * @pi: port information structure + * @tc_node: pointer to TC node + * @parent: pointer to parent node + * @layer: layer number to add nodes + * @num_nodes: number of nodes to be added + * @first_node_teid: pointer to the first node teid + * @num_nodes_added: pointer to number of nodes added + * + * This function add nodes to a given layer. + */ +static enum ice_status +ice_sched_add_nodes_to_layer(struct ice_port_info *pi, + struct ice_sched_node *tc_node, + struct ice_sched_node *parent, u8 layer, + u16 num_nodes, u32 *first_node_teid, + u16 *num_nodes_added) +{ + u32 *first_teid_ptr = first_node_teid; + u16 new_num_nodes, max_child_nodes; + enum ice_status status = 0; + struct ice_hw *hw = pi->hw; + u16 num_added = 0; + u32 temp; + + *num_nodes_added = 0; + + if (!num_nodes) + return status; + + if (!parent || layer < hw->sw_entry_point_layer) + return ICE_ERR_PARAM; + + /* max children per node per layer */ + max_child_nodes = + le16_to_cpu(hw->layer_info[parent->tx_sched_layer].max_children); + + /* current number of children + required nodes exceed max children ? */ + if ((parent->num_children + num_nodes) > max_child_nodes) { + /* Fail if the parent is a TC node */ + if (parent == tc_node) + return ICE_ERR_CFG; + + /* utilize all the spaces if the parent is not full */ + if (parent->num_children < max_child_nodes) { + new_num_nodes = max_child_nodes - parent->num_children; + /* this recursion is intentional, and wouldn't + * go more than 2 calls + */ + status = ice_sched_add_nodes_to_layer(pi, tc_node, + parent, layer, + new_num_nodes, + first_node_teid, + &num_added); + if (status) + return status; + + *num_nodes_added += num_added; + } + /* Don't modify the first node teid memory if the first node was + * added already in the above call. Instead send some temp + * memory for all other recursive calls. + */ + if (num_added) + first_teid_ptr = &temp; + + new_num_nodes = num_nodes - num_added; + + /* This parent is full, try the next sibling */ + parent = parent->sibling; + + /* this recursion is intentional, for 1024 queues + * per VSI, it goes max of 16 iterations. + * 1024 / 8 = 128 layer 8 nodes + * 128 /8 = 16 (add 8 nodes per iteration) + */ + status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, + layer, new_num_nodes, + first_teid_ptr, + &num_added); + *num_nodes_added += num_added; + return status; + } + + status = ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes, + num_nodes_added, first_node_teid); + return status; +} + +/** + * ice_sched_get_qgrp_layer - get the current queue group layer number + * @hw: pointer to the hw struct + * + * This function returns the current queue group layer number + */ +static u8 ice_sched_get_qgrp_layer(struct ice_hw *hw) +{ + /* It's always total layers - 1, the array is 0 relative so -2 */ + return hw->num_tx_sched_layers - ICE_QGRP_LAYER_OFFSET; +} + +/** + * ice_sched_get_vsi_layer - get the current VSI layer number + * @hw: pointer to the hw struct + * + * This function returns the current VSI layer number + */ +static u8 ice_sched_get_vsi_layer(struct ice_hw *hw) +{ + /* Num Layers VSI layer + * 9 6 + * 7 4 + * 5 or less sw_entry_point_layer + */ + /* calculate the vsi layer based on number of layers. */ + if (hw->num_tx_sched_layers > ICE_VSI_LAYER_OFFSET + 1) { + u8 layer = hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET; + + if (layer > hw->sw_entry_point_layer) + return layer; + } + return hw->sw_entry_point_layer; +} + +/** + * ice_sched_get_num_nodes_per_layer - Get the total number of nodes per layer + * @pi: pointer to the port info struct + * @layer: layer number + * + * This function calculates the number of nodes present in the scheduler tree + * including all the branches for a given layer + */ +static u16 +ice_sched_get_num_nodes_per_layer(struct ice_port_info *pi, u8 layer) +{ + struct ice_hw *hw; + u16 num_nodes = 0; + u8 i; + + if (!pi) + return num_nodes; + + hw = pi->hw; + + /* Calculate the number of nodes for all TCs */ + for (i = 0; i < pi->root->num_children; i++) { + struct ice_sched_node *tc_node, *node; + + tc_node = pi->root->children[i]; + + /* Get the first node */ + node = ice_sched_get_first_node(hw, tc_node, layer); + if (!node) + continue; + + /* count the siblings */ + while (node) { + num_nodes++; + node = node->sibling; + } + } + + return num_nodes; +} + +/** + * ice_sched_val_max_nodes - check max number of nodes reached or not + * @pi: port information structure + * @new_num_nodes_per_layer: pointer to the new number of nodes array + * + * This function checks whether the scheduler tree layers have enough space to + * add new nodes + */ +static enum ice_status +ice_sched_validate_for_max_nodes(struct ice_port_info *pi, + u16 *new_num_nodes_per_layer) +{ + struct ice_hw *hw = pi->hw; + u8 i, qg_layer; + u16 num_nodes; + + qg_layer = ice_sched_get_qgrp_layer(hw); + + /* walk through all the layers from SW entry point to qgroup layer */ + for (i = hw->sw_entry_point_layer; i <= qg_layer; i++) { + num_nodes = ice_sched_get_num_nodes_per_layer(pi, i); + if (num_nodes + new_num_nodes_per_layer[i] > + le16_to_cpu(hw->layer_info[i].max_pf_nodes)) { + ice_debug(hw, ICE_DBG_SCHED, + "max nodes reached for layer = %d\n", i); + return ICE_ERR_CFG; + } + } + return 0; +} + +/** + * ice_rm_dflt_leaf_node - remove the default leaf node in the tree + * @pi: port information structure + * + * This function removes the leaf node that was created by the FW + * during initialization + */ +static void +ice_rm_dflt_leaf_node(struct ice_port_info *pi) +{ + struct ice_sched_node *node; + + node = pi->root; + while (node) { + if (!node->num_children) + break; + node = node->children[0]; + } + if (node && node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF) { + u32 teid = le32_to_cpu(node->info.node_teid); + enum ice_status status; + + /* remove the default leaf node */ + status = ice_sched_remove_elems(pi->hw, node->parent, 1, &teid); + if (!status) + ice_free_sched_node(pi, node); + } +} + +/** + * ice_sched_rm_dflt_nodes - free the default nodes in the tree + * @pi: port information structure + * + * This function frees all the nodes except root and TC that were created by + * the FW during initialization + */ +static void +ice_sched_rm_dflt_nodes(struct ice_port_info *pi) +{ + struct ice_sched_node *node; + + ice_rm_dflt_leaf_node(pi); + + /* remove the default nodes except TC and root nodes */ + node = pi->root; + while (node) { + if (node->tx_sched_layer >= pi->hw->sw_entry_point_layer && + node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC && + node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT) { + ice_free_sched_node(pi, node); + break; + } + + if (!node->num_children) + break; + node = node->children[0]; + } +} + +/** + * ice_sched_init_port - Initialize scheduler by querying information from FW + * @pi: port info structure for the tree to cleanup + * + * This function is the initial call to find the total number of Tx scheduler + * resources, default topology created by firmware and storing the information + * in SW DB. + */ +enum ice_status ice_sched_init_port(struct ice_port_info *pi) +{ + struct ice_aqc_get_topo_elem *buf; + enum ice_status status; + struct ice_hw *hw; + u8 num_branches; + u16 num_elems; + u8 i, j; + + if (!pi) + return ICE_ERR_PARAM; + hw = pi->hw; + + /* Query the Default Topology from FW */ + buf = devm_kcalloc(ice_hw_to_dev(hw), ICE_TXSCHED_MAX_BRANCHES, + sizeof(*buf), GFP_KERNEL); + if (!buf) + return ICE_ERR_NO_MEMORY; + + /* Query default scheduling tree topology */ + status = ice_aq_get_dflt_topo(hw, pi->lport, buf, + sizeof(*buf) * ICE_TXSCHED_MAX_BRANCHES, + &num_branches, NULL); + if (status) + goto err_init_port; + + /* num_branches should be between 1-8 */ + if (num_branches < 1 || num_branches > ICE_TXSCHED_MAX_BRANCHES) { + ice_debug(hw, ICE_DBG_SCHED, "num_branches unexpected %d\n", + num_branches); + status = ICE_ERR_PARAM; + goto err_init_port; + } + + /* get the number of elements on the default/first branch */ + num_elems = le16_to_cpu(buf[0].hdr.num_elems); + + /* num_elems should always be between 1-9 */ + if (num_elems < 1 || num_elems > ICE_AQC_TOPO_MAX_LEVEL_NUM) { + ice_debug(hw, ICE_DBG_SCHED, "num_elems unexpected %d\n", + num_elems); + status = ICE_ERR_PARAM; + goto err_init_port; + } + + /* If the last node is a leaf node then the index of the Q group + * layer is two less than the number of elements. + */ + if (num_elems > 2 && buf[0].generic[num_elems - 1].data.elem_type == + ICE_AQC_ELEM_TYPE_LEAF) + pi->last_node_teid = + le32_to_cpu(buf[0].generic[num_elems - 2].node_teid); + else + pi->last_node_teid = + le32_to_cpu(buf[0].generic[num_elems - 1].node_teid); + + /* Insert the Tx Sched root node */ + status = ice_sched_add_root_node(pi, &buf[0].generic[0]); + if (status) + goto err_init_port; + + /* Parse the default tree and cache the information */ + for (i = 0; i < num_branches; i++) { + num_elems = le16_to_cpu(buf[i].hdr.num_elems); + + /* Skip root element as already inserted */ + for (j = 1; j < num_elems; j++) { + /* update the sw entry point */ + if (buf[0].generic[j].data.elem_type == + ICE_AQC_ELEM_TYPE_ENTRY_POINT) + hw->sw_entry_point_layer = j; + + status = ice_sched_add_node(pi, j, &buf[i].generic[j]); + if (status) + goto err_init_port; + } + } + + /* Remove the default nodes. */ + if (pi->root) + ice_sched_rm_dflt_nodes(pi); + + /* initialize the port for handling the scheduler tree */ + pi->port_state = ICE_SCHED_PORT_STATE_READY; + mutex_init(&pi->sched_lock); + INIT_LIST_HEAD(&pi->agg_list); + INIT_LIST_HEAD(&pi->vsi_info_list); + +err_init_port: + if (status && pi->root) { + ice_free_sched_node(pi, pi->root); + pi->root = NULL; + } + + devm_kfree(ice_hw_to_dev(hw), buf); + return status; +} + +/** + * ice_sched_query_res_alloc - query the FW for num of logical sched layers + * @hw: pointer to the HW struct + * + * query FW for allocated scheduler resources and store in HW struct + */ +enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw) +{ + struct ice_aqc_query_txsched_res_resp *buf; + enum ice_status status = 0; + + if (hw->layer_info) + return status; + + buf = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*buf), GFP_KERNEL); + if (!buf) + return ICE_ERR_NO_MEMORY; + + status = ice_aq_query_sched_res(hw, sizeof(*buf), buf, NULL); + if (status) + goto sched_query_out; + + hw->num_tx_sched_layers = le16_to_cpu(buf->sched_props.logical_levels); + hw->num_tx_sched_phys_layers = + le16_to_cpu(buf->sched_props.phys_levels); + hw->flattened_layers = buf->sched_props.flattening_bitmap; + hw->max_cgds = buf->sched_props.max_pf_cgds; + + hw->layer_info = devm_kmemdup(ice_hw_to_dev(hw), buf->layer_props, + (hw->num_tx_sched_layers * + sizeof(*hw->layer_info)), + GFP_KERNEL); + if (!hw->layer_info) { + status = ICE_ERR_NO_MEMORY; + goto sched_query_out; + } + +sched_query_out: + devm_kfree(ice_hw_to_dev(hw), buf); + return status; +} + +/** + * ice_sched_get_vsi_info_entry - Get the vsi entry list for given vsi_id + * @pi: port information structure + * @vsi_id: vsi id + * + * This function retrieves the vsi list for the given vsi id + */ +static struct ice_sched_vsi_info * +ice_sched_get_vsi_info_entry(struct ice_port_info *pi, u16 vsi_id) +{ + struct ice_sched_vsi_info *list_elem; + + if (!pi) + return NULL; + + list_for_each_entry(list_elem, &pi->vsi_info_list, list_entry) + if (list_elem->vsi_id == vsi_id) + return list_elem; + return NULL; +} + +/** + * ice_sched_find_node_in_subtree - Find node in part of base node subtree + * @hw: pointer to the hw struct + * @base: pointer to the base node + * @node: pointer to the node to search + * + * This function checks whether a given node is part of the base node + * subtree or not + */ +static bool +ice_sched_find_node_in_subtree(struct ice_hw *hw, struct ice_sched_node *base, + struct ice_sched_node *node) +{ + u8 i; + + for (i = 0; i < base->num_children; i++) { + struct ice_sched_node *child = base->children[i]; + + if (node == child) + return true; + + if (child->tx_sched_layer > node->tx_sched_layer) + return false; + + /* this recursion is intentional, and wouldn't + * go more than 8 calls + */ + if (ice_sched_find_node_in_subtree(hw, child, node)) + return true; + } + return false; +} + +/** + * ice_sched_get_free_qparent - Get a free lan or rdma q group node + * @pi: port information structure + * @vsi_id: vsi id + * @tc: branch number + * @owner: lan or rdma + * + * This function retrieves a free lan or rdma q group node + */ +struct ice_sched_node * +ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc, + u8 owner) +{ + struct ice_sched_node *vsi_node, *qgrp_node = NULL; + struct ice_sched_vsi_info *list_elem; + u16 max_children; + u8 qgrp_layer; + + qgrp_layer = ice_sched_get_qgrp_layer(pi->hw); + max_children = le16_to_cpu(pi->hw->layer_info[qgrp_layer].max_children); + + list_elem = ice_sched_get_vsi_info_entry(pi, vsi_id); + if (!list_elem) + goto lan_q_exit; + + vsi_node = list_elem->vsi_node[tc]; + + /* validate invalid VSI id */ + if (!vsi_node) + goto lan_q_exit; + + /* get the first q group node from VSI sub-tree */ + qgrp_node = ice_sched_get_first_node(pi->hw, vsi_node, qgrp_layer); + while (qgrp_node) { + /* make sure the qgroup node is part of the VSI subtree */ + if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node)) + if (qgrp_node->num_children < max_children && + qgrp_node->owner == owner) + break; + qgrp_node = qgrp_node->sibling; + } + +lan_q_exit: + return qgrp_node; +} + +/** + * ice_sched_get_vsi_node - Get a VSI node based on VSI id + * @hw: pointer to the hw struct + * @tc_node: pointer to the TC node + * @vsi_id: VSI id + * + * This function retrieves a VSI node for a given VSI id from a given + * TC branch + */ +static struct ice_sched_node * +ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node, + u16 vsi_id) +{ + struct ice_sched_node *node; + u8 vsi_layer; + + vsi_layer = ice_sched_get_vsi_layer(hw); + node = ice_sched_get_first_node(hw, tc_node, vsi_layer); + + /* Check whether it already exists */ + while (node) { + if (node->vsi_id == vsi_id) + return node; + node = node->sibling; + } + + return node; +} + +/** + * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes + * @hw: pointer to the hw struct + * @num_qs: number of queues + * @num_nodes: num nodes array + * + * This function calculates the number of VSI child nodes based on the + * number of queues. + */ +static void +ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes) +{ + u16 num = num_qs; + u8 i, qgl, vsil; + + qgl = ice_sched_get_qgrp_layer(hw); + vsil = ice_sched_get_vsi_layer(hw); + + /* calculate num nodes from q group to VSI layer */ + for (i = qgl; i > vsil; i--) { + u16 max_children = le16_to_cpu(hw->layer_info[i].max_children); + + /* round to the next integer if there is a remainder */ + num = DIV_ROUND_UP(num, max_children); + + /* need at least one node */ + num_nodes[i] = num ? num : 1; + } +} + +/** + * ice_sched_add_vsi_child_nodes - add VSI child nodes to tree + * @pi: port information structure + * @vsi_id: VSI id + * @tc_node: pointer to the TC node + * @num_nodes: pointer to the num nodes that needs to be added per layer + * @owner: node owner (lan or rdma) + * + * This function adds the VSI child nodes to tree. It gets called for + * lan and rdma separately. + */ +static enum ice_status +ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id, + struct ice_sched_node *tc_node, u16 *num_nodes, + u8 owner) +{ + struct ice_sched_node *parent, *node; + struct ice_hw *hw = pi->hw; + enum ice_status status; + u32 first_node_teid; + u16 num_added = 0; + u8 i, qgl, vsil; + + status = ice_sched_validate_for_max_nodes(pi, num_nodes); + if (status) + return status; + + qgl = ice_sched_get_qgrp_layer(hw); + vsil = ice_sched_get_vsi_layer(hw); + parent = ice_sched_get_vsi_node(hw, tc_node, vsi_id); + for (i = vsil + 1; i <= qgl; i++) { + if (!parent) + return ICE_ERR_CFG; + status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i, + num_nodes[i], + &first_node_teid, + &num_added); + if (status || num_nodes[i] != num_added) + return ICE_ERR_CFG; + + /* The newly added node can be a new parent for the next + * layer nodes + */ + if (num_added) { + parent = ice_sched_find_node_by_teid(tc_node, + first_node_teid); + node = parent; + while (node) { + node->owner = owner; + node = node->sibling; + } + } else { + parent = parent->children[0]; + } + } + + return 0; +} + +/** + * ice_sched_rm_vsi_child_nodes - remove VSI child nodes from the tree + * @pi: port information structure + * @vsi_node: pointer to the VSI node + * @num_nodes: pointer to the num nodes that needs to be removed per layer + * @owner: node owner (lan or rdma) + * + * This function removes the VSI child nodes from the tree. It gets called for + * lan and rdma separately. + */ +static void +ice_sched_rm_vsi_child_nodes(struct ice_port_info *pi, + struct ice_sched_node *vsi_node, u16 *num_nodes, + u8 owner) +{ + struct ice_sched_node *node, *next; + u8 i, qgl, vsil; + u16 num; + + qgl = ice_sched_get_qgrp_layer(pi->hw); + vsil = ice_sched_get_vsi_layer(pi->hw); + + for (i = qgl; i > vsil; i--) { + num = num_nodes[i]; + node = ice_sched_get_first_node(pi->hw, vsi_node, i); + while (node && num) { + next = node->sibling; + if (node->owner == owner && !node->num_children) { + ice_free_sched_node(pi, node); + num--; + } + node = next; + } + } +} + +/** + * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes + * @hw: pointer to the hw struct + * @tc_node: pointer to TC node + * @num_nodes: pointer to num nodes array + * + * This function calculates the number of supported nodes needed to add this + * VSI into tx tree including the VSI, parent and intermediate nodes in below + * layers + */ +static void +ice_sched_calc_vsi_support_nodes(struct ice_hw *hw, + struct ice_sched_node *tc_node, u16 *num_nodes) +{ + struct ice_sched_node *node; + u16 max_child; + u8 i, vsil; + + vsil = ice_sched_get_vsi_layer(hw); + for (i = vsil; i >= hw->sw_entry_point_layer; i--) + /* Add intermediate nodes if TC has no children and + * need at least one node for VSI + */ + if (!tc_node->num_children || i == vsil) { + num_nodes[i]++; + } else { + /* If intermediate nodes are reached max children + * then add a new one. + */ + node = ice_sched_get_first_node(hw, tc_node, i); + max_child = le16_to_cpu(hw->layer_info[i].max_children); + + /* scan all the siblings */ + while (node) { + if (node->num_children < max_child) + break; + node = node->sibling; + } + + /* all the nodes are full, allocate a new one */ + if (!node) + num_nodes[i]++; + } +} + +/** + * ice_sched_add_vsi_support_nodes - add VSI supported nodes into tx tree + * @pi: port information structure + * @vsi_id: VSI Id + * @tc_node: pointer to TC node + * @num_nodes: pointer to num nodes array + * + * This function adds the VSI supported nodes into tx tree including the + * VSI, its parent and intermediate nodes in below layers + */ +static enum ice_status +ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_id, + struct ice_sched_node *tc_node, u16 *num_nodes) +{ + struct ice_sched_node *parent = tc_node; + enum ice_status status; + u32 first_node_teid; + u16 num_added = 0; + u8 i, vsil; + + if (!pi) + return ICE_ERR_PARAM; + + status = ice_sched_validate_for_max_nodes(pi, num_nodes); + if (status) + return status; + + vsil = ice_sched_get_vsi_layer(pi->hw); + for (i = pi->hw->sw_entry_point_layer; i <= vsil; i++) { + status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, + i, num_nodes[i], + &first_node_teid, + &num_added); + if (status || num_nodes[i] != num_added) + return ICE_ERR_CFG; + + /* The newly added node can be a new parent for the next + * layer nodes + */ + if (num_added) + parent = ice_sched_find_node_by_teid(tc_node, + first_node_teid); + else + parent = parent->children[0]; + + if (!parent) + return ICE_ERR_CFG; + + if (i == vsil) + parent->vsi_id = vsi_id; + } + return 0; +} + +/** + * ice_sched_add_vsi_to_topo - add a new VSI into tree + * @pi: port information structure + * @vsi_id: VSI Id + * @tc: TC number + * + * This function adds a new VSI into scheduler tree + */ +static enum ice_status +ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_id, u8 tc) +{ + u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 }; + struct ice_sched_node *tc_node; + struct ice_hw *hw = pi->hw; + + tc_node = ice_sched_get_tc_node(pi, tc); + if (!tc_node) + return ICE_ERR_PARAM; + + /* calculate number of supported nodes needed for this VSI */ + ice_sched_calc_vsi_support_nodes(hw, tc_node, num_nodes); + + /* add vsi supported nodes to tc subtree */ + return ice_sched_add_vsi_support_nodes(pi, vsi_id, tc_node, num_nodes); +} + +/** + * ice_sched_update_vsi_child_nodes - update VSI child nodes + * @pi: port information structure + * @vsi_id: VSI Id + * @tc: TC number + * @new_numqs: new number of max queues + * @owner: owner of this subtree + * + * This function updates the VSI child nodes based on the number of queues + */ +static enum ice_status +ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id, u8 tc, + u16 new_numqs, u8 owner) +{ + u16 prev_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 }; + u16 new_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 }; + struct ice_sched_node *vsi_node; + struct ice_sched_node *tc_node; + struct ice_sched_vsi_info *vsi; + enum ice_status status = 0; + struct ice_hw *hw = pi->hw; + u16 prev_numqs; + u8 i; + + tc_node = ice_sched_get_tc_node(pi, tc); + if (!tc_node) + return ICE_ERR_CFG; + + vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id); + if (!vsi_node) + return ICE_ERR_CFG; + + vsi = ice_sched_get_vsi_info_entry(pi, vsi_id); + if (!vsi) + return ICE_ERR_CFG; + + if (owner == ICE_SCHED_NODE_OWNER_LAN) + prev_numqs = vsi->max_lanq[tc]; + else + return ICE_ERR_PARAM; + + /* num queues are not changed */ + if (prev_numqs == new_numqs) + return status; + + /* calculate number of nodes based on prev/new number of qs */ + if (prev_numqs) + ice_sched_calc_vsi_child_nodes(hw, prev_numqs, prev_num_nodes); + + if (new_numqs) + ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes); + + if (prev_numqs > new_numqs) { + for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++) + new_num_nodes[i] = prev_num_nodes[i] - new_num_nodes[i]; + + ice_sched_rm_vsi_child_nodes(pi, vsi_node, new_num_nodes, + owner); + } else { + for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++) + new_num_nodes[i] -= prev_num_nodes[i]; + + status = ice_sched_add_vsi_child_nodes(pi, vsi_id, tc_node, + new_num_nodes, owner); + if (status) + return status; + } + + vsi->max_lanq[tc] = new_numqs; + + return status; +} + +/** + * ice_sched_cfg_vsi - configure the new/exisiting VSI + * @pi: port information structure + * @vsi_id: VSI Id + * @tc: TC number + * @maxqs: max number of queues + * @owner: lan or rdma + * @enable: TC enabled or disabled + * + * This function adds/updates VSI nodes based on the number of queues. If TC is + * enabled and VSI is in suspended state then resume the VSI back. If TC is + * disabled then suspend the VSI if it is not already. + */ +enum ice_status +ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_id, u8 tc, u16 maxqs, + u8 owner, bool enable) +{ + struct ice_sched_node *vsi_node, *tc_node; + struct ice_sched_vsi_info *vsi; + enum ice_status status = 0; + struct ice_hw *hw = pi->hw; + + tc_node = ice_sched_get_tc_node(pi, tc); + if (!tc_node) + return ICE_ERR_PARAM; + + vsi = ice_sched_get_vsi_info_entry(pi, vsi_id); + if (!vsi) + vsi = ice_sched_create_vsi_info_entry(pi, vsi_id); + if (!vsi) + return ICE_ERR_NO_MEMORY; + + vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id); + + /* suspend the VSI if tc is not enabled */ + if (!enable) { + if (vsi_node && vsi_node->in_use) { + u32 teid = le32_to_cpu(vsi_node->info.node_teid); + + status = ice_sched_suspend_resume_elems(hw, 1, &teid, + true); + if (!status) + vsi_node->in_use = false; + } + return status; + } + + /* TC is enabled, if it is a new VSI then add it to the tree */ + if (!vsi_node) { + status = ice_sched_add_vsi_to_topo(pi, vsi_id, tc); + if (status) + return status; + vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id); + if (!vsi_node) + return ICE_ERR_CFG; + vsi->vsi_node[tc] = vsi_node; + vsi_node->in_use = true; + } + + /* update the VSI child nodes */ + status = ice_sched_update_vsi_child_nodes(pi, vsi_id, tc, maxqs, owner); + if (status) + return status; + + /* TC is enabled, resume the VSI if it is in the suspend state */ + if (!vsi_node->in_use) { + u32 teid = le32_to_cpu(vsi_node->info.node_teid); + + status = ice_sched_suspend_resume_elems(hw, 1, &teid, false); + if (!status) + vsi_node->in_use = true; + } + + return status; +} diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h new file mode 100644 index 000000000..badadcc12 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_SCHED_H_ +#define _ICE_SCHED_H_ + +#include "ice_common.h" + +#define ICE_QGRP_LAYER_OFFSET 2 +#define ICE_VSI_LAYER_OFFSET 4 + +struct ice_sched_agg_vsi_info { + struct list_head list_entry; + DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS); + u16 vsi_id; +}; + +struct ice_sched_agg_info { + struct list_head agg_vsi_list; + struct list_head list_entry; + DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS); + u32 agg_id; + enum ice_agg_type agg_type; +}; + +/* FW AQ command calls */ +enum ice_status ice_sched_init_port(struct ice_port_info *pi); +enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw); +void ice_sched_cleanup_all(struct ice_hw *hw); +struct ice_sched_node * +ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid); +enum ice_status +ice_sched_add_node(struct ice_port_info *pi, u8 layer, + struct ice_aqc_txsched_elem_data *info); +void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node); +struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc); +struct ice_sched_node * +ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc, + u8 owner); +enum ice_status +ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_id, u8 tc, u16 maxqs, + u8 owner, bool enable); +#endif /* _ICE_SCHED_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h new file mode 100644 index 000000000..d2dae913d --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_status.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_STATUS_H_ +#define _ICE_STATUS_H_ + +/* Error Codes */ +enum ice_status { + ICE_ERR_PARAM = -1, + ICE_ERR_NOT_IMPL = -2, + ICE_ERR_NOT_READY = -3, + ICE_ERR_BAD_PTR = -5, + ICE_ERR_INVAL_SIZE = -6, + ICE_ERR_DEVICE_NOT_SUPPORTED = -8, + ICE_ERR_RESET_FAILED = -9, + ICE_ERR_FW_API_VER = -10, + ICE_ERR_NO_MEMORY = -11, + ICE_ERR_CFG = -12, + ICE_ERR_OUT_OF_RANGE = -13, + ICE_ERR_ALREADY_EXISTS = -14, + ICE_ERR_DOES_NOT_EXIST = -15, + ICE_ERR_MAX_LIMIT = -17, + ICE_ERR_RESET_ONGOING = -18, + ICE_ERR_BUF_TOO_SHORT = -52, + ICE_ERR_NVM_BLANK_MODE = -53, + ICE_ERR_AQ_ERROR = -100, + ICE_ERR_AQ_TIMEOUT = -101, + ICE_ERR_AQ_FULL = -102, + ICE_ERR_AQ_NO_WORK = -103, + ICE_ERR_AQ_EMPTY = -104, +}; + +#endif /* _ICE_STATUS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c new file mode 100644 index 000000000..1bfc59dff --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -0,0 +1,1892 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +#include "ice_switch.h" + +#define ICE_ETH_DA_OFFSET 0 +#define ICE_ETH_ETHTYPE_OFFSET 12 +#define ICE_ETH_VLAN_TCI_OFFSET 14 +#define ICE_MAX_VLAN_ID 0xFFF + +/* Dummy ethernet header needed in the ice_aqc_sw_rules_elem + * struct to configure any switch filter rules. + * {DA (6 bytes), SA(6 bytes), + * Ether type (2 bytes for header without VLAN tag) OR + * VLAN tag (4 bytes for header with VLAN tag) } + * + * Word on Hardcoded values + * byte 0 = 0x2: to identify it as locally administered DA MAC + * byte 6 = 0x2: to identify it as locally administered SA MAC + * byte 12 = 0x81 & byte 13 = 0x00: + * In case of VLAN filter first two bytes defines ether type (0x8100) + * and remaining two bytes are placeholder for programming a given VLAN id + * In case of Ether type filter it is treated as header without VLAN tag + * and byte 12 and 13 is used to program a given Ether type instead + */ +#define DUMMY_ETH_HDR_LEN 16 +static const u8 dummy_eth_header[DUMMY_ETH_HDR_LEN] = { 0x2, 0, 0, 0, 0, 0, + 0x2, 0, 0, 0, 0, 0, + 0x81, 0, 0, 0}; + +#define ICE_SW_RULE_RX_TX_ETH_HDR_SIZE \ + (sizeof(struct ice_aqc_sw_rules_elem) - \ + sizeof(((struct ice_aqc_sw_rules_elem *)0)->pdata) + \ + sizeof(struct ice_sw_rule_lkup_rx_tx) + DUMMY_ETH_HDR_LEN - 1) +#define ICE_SW_RULE_RX_TX_NO_HDR_SIZE \ + (sizeof(struct ice_aqc_sw_rules_elem) - \ + sizeof(((struct ice_aqc_sw_rules_elem *)0)->pdata) + \ + sizeof(struct ice_sw_rule_lkup_rx_tx) - 1) +#define ICE_SW_RULE_LG_ACT_SIZE(n) \ + (sizeof(struct ice_aqc_sw_rules_elem) - \ + sizeof(((struct ice_aqc_sw_rules_elem *)0)->pdata) + \ + sizeof(struct ice_sw_rule_lg_act) - \ + sizeof(((struct ice_sw_rule_lg_act *)0)->act) + \ + ((n) * sizeof(((struct ice_sw_rule_lg_act *)0)->act))) +#define ICE_SW_RULE_VSI_LIST_SIZE(n) \ + (sizeof(struct ice_aqc_sw_rules_elem) - \ + sizeof(((struct ice_aqc_sw_rules_elem *)0)->pdata) + \ + sizeof(struct ice_sw_rule_vsi_list) - \ + sizeof(((struct ice_sw_rule_vsi_list *)0)->vsi) + \ + ((n) * sizeof(((struct ice_sw_rule_vsi_list *)0)->vsi))) + +/** + * ice_aq_alloc_free_res - command to allocate/free resources + * @hw: pointer to the hw struct + * @num_entries: number of resource entries in buffer + * @buf: Indirect buffer to hold data parameters and response + * @buf_size: size of buffer for indirect commands + * @opc: pass in the command opcode + * @cd: pointer to command details structure or NULL + * + * Helper function to allocate/free resources using the admin queue commands + */ +static enum ice_status +ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries, + struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size, + enum ice_adminq_opc opc, struct ice_sq_cd *cd) +{ + struct ice_aqc_alloc_free_res_cmd *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.sw_res_ctrl; + + if (!buf) + return ICE_ERR_PARAM; + + if (buf_size < (num_entries * sizeof(buf->elem[0]))) + return ICE_ERR_PARAM; + + ice_fill_dflt_direct_cmd_desc(&desc, opc); + + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + cmd->num_entries = cpu_to_le16(num_entries); + + return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); +} + +/** + * ice_aq_get_sw_cfg - get switch configuration + * @hw: pointer to the hardware structure + * @buf: pointer to the result buffer + * @buf_size: length of the buffer available for response + * @req_desc: pointer to requested descriptor + * @num_elems: pointer to number of elements + * @cd: pointer to command details structure or NULL + * + * Get switch configuration (0x0200) to be placed in 'buff'. + * This admin command returns information such as initial VSI/port number + * and switch ID it belongs to. + * + * NOTE: *req_desc is both an input/output parameter. + * The caller of this function first calls this function with *request_desc set + * to 0. If the response from f/w has *req_desc set to 0, all the switch + * configuration information has been returned; if non-zero (meaning not all + * the information was returned), the caller should call this function again + * with *req_desc set to the previous value returned by f/w to get the + * next block of switch configuration information. + * + * *num_elems is output only parameter. This reflects the number of elements + * in response buffer. The caller of this function to use *num_elems while + * parsing the response buffer. + */ +static enum ice_status +ice_aq_get_sw_cfg(struct ice_hw *hw, struct ice_aqc_get_sw_cfg_resp *buf, + u16 buf_size, u16 *req_desc, u16 *num_elems, + struct ice_sq_cd *cd) +{ + struct ice_aqc_get_sw_cfg *cmd; + enum ice_status status; + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_sw_cfg); + cmd = &desc.params.get_sw_conf; + cmd->element = cpu_to_le16(*req_desc); + + status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); + if (!status) { + *req_desc = le16_to_cpu(cmd->element); + *num_elems = le16_to_cpu(cmd->num_elems); + } + + return status; +} + +/** + * ice_aq_add_vsi + * @hw: pointer to the hw struct + * @vsi_ctx: pointer to a VSI context struct + * @cd: pointer to command details structure or NULL + * + * Add a VSI context to the hardware (0x0210) + */ +enum ice_status +ice_aq_add_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, + struct ice_sq_cd *cd) +{ + struct ice_aqc_add_update_free_vsi_resp *res; + struct ice_aqc_add_get_update_free_vsi *cmd; + enum ice_status status; + struct ice_aq_desc desc; + + cmd = &desc.params.vsi_cmd; + res = (struct ice_aqc_add_update_free_vsi_resp *)&desc.params.raw; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_vsi); + + if (!vsi_ctx->alloc_from_pool) + cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | + ICE_AQ_VSI_IS_VALID); + + cmd->vsi_flags = cpu_to_le16(vsi_ctx->flags); + + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + status = ice_aq_send_cmd(hw, &desc, &vsi_ctx->info, + sizeof(vsi_ctx->info), cd); + + if (!status) { + vsi_ctx->vsi_num = le16_to_cpu(res->vsi_num) & ICE_AQ_VSI_NUM_M; + vsi_ctx->vsis_allocd = le16_to_cpu(res->vsi_used); + vsi_ctx->vsis_unallocated = le16_to_cpu(res->vsi_free); + } + + return status; +} + +/** + * ice_aq_update_vsi + * @hw: pointer to the hw struct + * @vsi_ctx: pointer to a VSI context struct + * @cd: pointer to command details structure or NULL + * + * Update VSI context in the hardware (0x0211) + */ +enum ice_status +ice_aq_update_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, + struct ice_sq_cd *cd) +{ + struct ice_aqc_add_update_free_vsi_resp *resp; + struct ice_aqc_add_get_update_free_vsi *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.vsi_cmd; + resp = (struct ice_aqc_add_update_free_vsi_resp *)&desc.params.raw; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_update_vsi); + + cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID); + + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + status = ice_aq_send_cmd(hw, &desc, &vsi_ctx->info, + sizeof(vsi_ctx->info), cd); + + if (!status) { + vsi_ctx->vsis_allocd = le16_to_cpu(resp->vsi_used); + vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free); + } + + return status; +} + +/** + * ice_aq_free_vsi + * @hw: pointer to the hw struct + * @vsi_ctx: pointer to a VSI context struct + * @keep_vsi_alloc: keep VSI allocation as part of this PF's resources + * @cd: pointer to command details structure or NULL + * + * Get VSI context info from hardware (0x0213) + */ +enum ice_status +ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, + bool keep_vsi_alloc, struct ice_sq_cd *cd) +{ + struct ice_aqc_add_update_free_vsi_resp *resp; + struct ice_aqc_add_get_update_free_vsi *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.vsi_cmd; + resp = (struct ice_aqc_add_update_free_vsi_resp *)&desc.params.raw; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_free_vsi); + + cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID); + if (keep_vsi_alloc) + cmd->cmd_flags = cpu_to_le16(ICE_AQ_VSI_KEEP_ALLOC); + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); + if (!status) { + vsi_ctx->vsis_allocd = le16_to_cpu(resp->vsi_used); + vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free); + } + + return status; +} + +/** + * ice_aq_alloc_free_vsi_list + * @hw: pointer to the hw struct + * @vsi_list_id: VSI list id returned or used for lookup + * @lkup_type: switch rule filter lookup type + * @opc: switch rules population command type - pass in the command opcode + * + * allocates or free a VSI list resource + */ +static enum ice_status +ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id, + enum ice_sw_lkup_type lkup_type, + enum ice_adminq_opc opc) +{ + struct ice_aqc_alloc_free_res_elem *sw_buf; + struct ice_aqc_res_elem *vsi_ele; + enum ice_status status; + u16 buf_len; + + buf_len = sizeof(*sw_buf); + sw_buf = devm_kzalloc(ice_hw_to_dev(hw), buf_len, GFP_KERNEL); + if (!sw_buf) + return ICE_ERR_NO_MEMORY; + sw_buf->num_elems = cpu_to_le16(1); + + if (lkup_type == ICE_SW_LKUP_MAC || + lkup_type == ICE_SW_LKUP_MAC_VLAN || + lkup_type == ICE_SW_LKUP_ETHERTYPE || + lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC || + lkup_type == ICE_SW_LKUP_PROMISC || + lkup_type == ICE_SW_LKUP_PROMISC_VLAN) { + sw_buf->res_type = cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_REP); + } else if (lkup_type == ICE_SW_LKUP_VLAN) { + sw_buf->res_type = + cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_PRUNE); + } else { + status = ICE_ERR_PARAM; + goto ice_aq_alloc_free_vsi_list_exit; + } + + if (opc == ice_aqc_opc_free_res) + sw_buf->elem[0].e.sw_resp = cpu_to_le16(*vsi_list_id); + + status = ice_aq_alloc_free_res(hw, 1, sw_buf, buf_len, opc, NULL); + if (status) + goto ice_aq_alloc_free_vsi_list_exit; + + if (opc == ice_aqc_opc_alloc_res) { + vsi_ele = &sw_buf->elem[0]; + *vsi_list_id = le16_to_cpu(vsi_ele->e.sw_resp); + } + +ice_aq_alloc_free_vsi_list_exit: + devm_kfree(ice_hw_to_dev(hw), sw_buf); + return status; +} + +/** + * ice_aq_sw_rules - add/update/remove switch rules + * @hw: pointer to the hw struct + * @rule_list: pointer to switch rule population list + * @rule_list_sz: total size of the rule list in bytes + * @num_rules: number of switch rules in the rule_list + * @opc: switch rules population command type - pass in the command opcode + * @cd: pointer to command details structure or NULL + * + * Add(0x02a0)/Update(0x02a1)/Remove(0x02a2) switch rules commands to firmware + */ +static enum ice_status +ice_aq_sw_rules(struct ice_hw *hw, void *rule_list, u16 rule_list_sz, + u8 num_rules, enum ice_adminq_opc opc, struct ice_sq_cd *cd) +{ + struct ice_aq_desc desc; + + if (opc != ice_aqc_opc_add_sw_rules && + opc != ice_aqc_opc_update_sw_rules && + opc != ice_aqc_opc_remove_sw_rules) + return ICE_ERR_PARAM; + + ice_fill_dflt_direct_cmd_desc(&desc, opc); + + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + desc.params.sw_rules.num_rules_fltr_entry_index = + cpu_to_le16(num_rules); + return ice_aq_send_cmd(hw, &desc, rule_list, rule_list_sz, cd); +} + +/* ice_init_port_info - Initialize port_info with switch configuration data + * @pi: pointer to port_info + * @vsi_port_num: VSI number or port number + * @type: Type of switch element (port or VSI) + * @swid: switch ID of the switch the element is attached to + * @pf_vf_num: PF or VF number + * @is_vf: true if the element is a VF, false otherwise + */ +static void +ice_init_port_info(struct ice_port_info *pi, u16 vsi_port_num, u8 type, + u16 swid, u16 pf_vf_num, bool is_vf) +{ + switch (type) { + case ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT: + pi->lport = (u8)(vsi_port_num & ICE_LPORT_MASK); + pi->sw_id = swid; + pi->pf_vf_num = pf_vf_num; + pi->is_vf = is_vf; + pi->dflt_tx_vsi_num = ICE_DFLT_VSI_INVAL; + pi->dflt_rx_vsi_num = ICE_DFLT_VSI_INVAL; + break; + default: + ice_debug(pi->hw, ICE_DBG_SW, + "incorrect VSI/port type received\n"); + break; + } +} + +/* ice_get_initial_sw_cfg - Get initial port and default VSI data + * @hw: pointer to the hardware structure + */ +enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw) +{ + struct ice_aqc_get_sw_cfg_resp *rbuf; + enum ice_status status; + u16 req_desc = 0; + u16 num_elems; + u16 i; + + rbuf = devm_kzalloc(ice_hw_to_dev(hw), ICE_SW_CFG_MAX_BUF_LEN, + GFP_KERNEL); + + if (!rbuf) + return ICE_ERR_NO_MEMORY; + + /* Multiple calls to ice_aq_get_sw_cfg may be required + * to get all the switch configuration information. The need + * for additional calls is indicated by ice_aq_get_sw_cfg + * writing a non-zero value in req_desc + */ + do { + status = ice_aq_get_sw_cfg(hw, rbuf, ICE_SW_CFG_MAX_BUF_LEN, + &req_desc, &num_elems, NULL); + + if (status) + break; + + for (i = 0; i < num_elems; i++) { + struct ice_aqc_get_sw_cfg_resp_elem *ele; + u16 pf_vf_num, swid, vsi_port_num; + bool is_vf = false; + u8 type; + + ele = rbuf[i].elements; + vsi_port_num = le16_to_cpu(ele->vsi_port_num) & + ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_M; + + pf_vf_num = le16_to_cpu(ele->pf_vf_num) & + ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_M; + + swid = le16_to_cpu(ele->swid); + + if (le16_to_cpu(ele->pf_vf_num) & + ICE_AQC_GET_SW_CONF_RESP_IS_VF) + is_vf = true; + + type = le16_to_cpu(ele->vsi_port_num) >> + ICE_AQC_GET_SW_CONF_RESP_TYPE_S; + + if (type == ICE_AQC_GET_SW_CONF_RESP_VSI) { + /* FW VSI is not needed. Just continue. */ + continue; + } + + ice_init_port_info(hw->port_info, vsi_port_num, + type, swid, pf_vf_num, is_vf); + } + } while (req_desc && !status); + + devm_kfree(ice_hw_to_dev(hw), (void *)rbuf); + return status; +} + +/** + * ice_fill_sw_info - Helper function to populate lb_en and lan_en + * @hw: pointer to the hardware structure + * @f_info: filter info structure to fill/update + * + * This helper function populates the lb_en and lan_en elements of the provided + * ice_fltr_info struct using the switch's type and characteristics of the + * switch rule being configured. + */ +static void ice_fill_sw_info(struct ice_hw *hw, struct ice_fltr_info *f_info) +{ + f_info->lb_en = false; + f_info->lan_en = false; + if ((f_info->flag & ICE_FLTR_TX) && + (f_info->fltr_act == ICE_FWD_TO_VSI || + f_info->fltr_act == ICE_FWD_TO_VSI_LIST || + f_info->fltr_act == ICE_FWD_TO_Q || + f_info->fltr_act == ICE_FWD_TO_QGRP)) { + f_info->lb_en = true; + if (!(hw->evb_veb && f_info->lkup_type == ICE_SW_LKUP_MAC && + is_unicast_ether_addr(f_info->l_data.mac.mac_addr))) + f_info->lan_en = true; + } +} + +/** + * ice_fill_sw_rule - Helper function to fill switch rule structure + * @hw: pointer to the hardware structure + * @f_info: entry containing packet forwarding information + * @s_rule: switch rule structure to be filled in based on mac_entry + * @opc: switch rules population command type - pass in the command opcode + */ +static void +ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info, + struct ice_aqc_sw_rules_elem *s_rule, enum ice_adminq_opc opc) +{ + u16 vlan_id = ICE_MAX_VLAN_ID + 1; + u8 eth_hdr[DUMMY_ETH_HDR_LEN]; + void *daddr = NULL; + u32 act = 0; + __be16 *off; + u8 q_rgn; + + if (opc == ice_aqc_opc_remove_sw_rules) { + s_rule->pdata.lkup_tx_rx.act = 0; + s_rule->pdata.lkup_tx_rx.index = + cpu_to_le16(f_info->fltr_rule_id); + s_rule->pdata.lkup_tx_rx.hdr_len = 0; + return; + } + + /* initialize the ether header with a dummy header */ + memcpy(eth_hdr, dummy_eth_header, sizeof(dummy_eth_header)); + ice_fill_sw_info(hw, f_info); + + switch (f_info->fltr_act) { + case ICE_FWD_TO_VSI: + act |= (f_info->fwd_id.vsi_id << ICE_SINGLE_ACT_VSI_ID_S) & + ICE_SINGLE_ACT_VSI_ID_M; + if (f_info->lkup_type != ICE_SW_LKUP_VLAN) + act |= ICE_SINGLE_ACT_VSI_FORWARDING | + ICE_SINGLE_ACT_VALID_BIT; + break; + case ICE_FWD_TO_VSI_LIST: + act |= ICE_SINGLE_ACT_VSI_LIST; + act |= (f_info->fwd_id.vsi_list_id << + ICE_SINGLE_ACT_VSI_LIST_ID_S) & + ICE_SINGLE_ACT_VSI_LIST_ID_M; + if (f_info->lkup_type != ICE_SW_LKUP_VLAN) + act |= ICE_SINGLE_ACT_VSI_FORWARDING | + ICE_SINGLE_ACT_VALID_BIT; + break; + case ICE_FWD_TO_Q: + act |= ICE_SINGLE_ACT_TO_Q; + act |= (f_info->fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) & + ICE_SINGLE_ACT_Q_INDEX_M; + break; + case ICE_DROP_PACKET: + act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_DROP | + ICE_SINGLE_ACT_VALID_BIT; + break; + case ICE_FWD_TO_QGRP: + q_rgn = f_info->qgrp_size > 0 ? + (u8)ilog2(f_info->qgrp_size) : 0; + act |= ICE_SINGLE_ACT_TO_Q; + act |= (f_info->fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) & + ICE_SINGLE_ACT_Q_INDEX_M; + act |= (q_rgn << ICE_SINGLE_ACT_Q_REGION_S) & + ICE_SINGLE_ACT_Q_REGION_M; + break; + default: + return; + } + + if (f_info->lb_en) + act |= ICE_SINGLE_ACT_LB_ENABLE; + if (f_info->lan_en) + act |= ICE_SINGLE_ACT_LAN_ENABLE; + + switch (f_info->lkup_type) { + case ICE_SW_LKUP_MAC: + daddr = f_info->l_data.mac.mac_addr; + break; + case ICE_SW_LKUP_VLAN: + vlan_id = f_info->l_data.vlan.vlan_id; + if (f_info->fltr_act == ICE_FWD_TO_VSI || + f_info->fltr_act == ICE_FWD_TO_VSI_LIST) { + act |= ICE_SINGLE_ACT_PRUNE; + act |= ICE_SINGLE_ACT_EGRESS | ICE_SINGLE_ACT_INGRESS; + } + break; + case ICE_SW_LKUP_ETHERTYPE_MAC: + daddr = f_info->l_data.ethertype_mac.mac_addr; + /* fall-through */ + case ICE_SW_LKUP_ETHERTYPE: + off = (__be16 *)ð_hdr[ICE_ETH_ETHTYPE_OFFSET]; + *off = cpu_to_be16(f_info->l_data.ethertype_mac.ethertype); + break; + case ICE_SW_LKUP_MAC_VLAN: + daddr = f_info->l_data.mac_vlan.mac_addr; + vlan_id = f_info->l_data.mac_vlan.vlan_id; + break; + case ICE_SW_LKUP_PROMISC_VLAN: + vlan_id = f_info->l_data.mac_vlan.vlan_id; + /* fall-through */ + case ICE_SW_LKUP_PROMISC: + daddr = f_info->l_data.mac_vlan.mac_addr; + break; + default: + break; + } + + s_rule->type = (f_info->flag & ICE_FLTR_RX) ? + cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX) : + cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX); + + /* Recipe set depending on lookup type */ + s_rule->pdata.lkup_tx_rx.recipe_id = cpu_to_le16(f_info->lkup_type); + s_rule->pdata.lkup_tx_rx.src = cpu_to_le16(f_info->src); + s_rule->pdata.lkup_tx_rx.act = cpu_to_le32(act); + + if (daddr) + ether_addr_copy(ð_hdr[ICE_ETH_DA_OFFSET], daddr); + + if (!(vlan_id > ICE_MAX_VLAN_ID)) { + off = (__be16 *)ð_hdr[ICE_ETH_VLAN_TCI_OFFSET]; + *off = cpu_to_be16(vlan_id); + } + + /* Create the switch rule with the final dummy Ethernet header */ + if (opc != ice_aqc_opc_update_sw_rules) + s_rule->pdata.lkup_tx_rx.hdr_len = cpu_to_le16(sizeof(eth_hdr)); + + memcpy(s_rule->pdata.lkup_tx_rx.hdr, eth_hdr, sizeof(eth_hdr)); +} + +/** + * ice_add_marker_act + * @hw: pointer to the hardware structure + * @m_ent: the management entry for which sw marker needs to be added + * @sw_marker: sw marker to tag the Rx descriptor with + * @l_id: large action resource id + * + * Create a large action to hold software marker and update the switch rule + * entry pointed by m_ent with newly created large action + */ +static enum ice_status +ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent, + u16 sw_marker, u16 l_id) +{ + struct ice_aqc_sw_rules_elem *lg_act, *rx_tx; + /* For software marker we need 3 large actions + * 1. FWD action: FWD TO VSI or VSI LIST + * 2. GENERIC VALUE action to hold the profile id + * 3. GENERIC VALUE action to hold the software marker id + */ + const u16 num_lg_acts = 3; + enum ice_status status; + u16 lg_act_size; + u16 rules_size; + u16 vsi_info; + u32 act; + + if (m_ent->fltr_info.lkup_type != ICE_SW_LKUP_MAC) + return ICE_ERR_PARAM; + + /* Create two back-to-back switch rules and submit them to the HW using + * one memory buffer: + * 1. Large Action + * 2. Look up tx rx + */ + lg_act_size = (u16)ICE_SW_RULE_LG_ACT_SIZE(num_lg_acts); + rules_size = lg_act_size + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE; + lg_act = devm_kzalloc(ice_hw_to_dev(hw), rules_size, GFP_KERNEL); + if (!lg_act) + return ICE_ERR_NO_MEMORY; + + rx_tx = (struct ice_aqc_sw_rules_elem *)((u8 *)lg_act + lg_act_size); + + /* Fill in the first switch rule i.e. large action */ + lg_act->type = cpu_to_le16(ICE_AQC_SW_RULES_T_LG_ACT); + lg_act->pdata.lg_act.index = cpu_to_le16(l_id); + lg_act->pdata.lg_act.size = cpu_to_le16(num_lg_acts); + + /* First action VSI forwarding or VSI list forwarding depending on how + * many VSIs + */ + vsi_info = (m_ent->vsi_count > 1) ? + m_ent->fltr_info.fwd_id.vsi_list_id : + m_ent->fltr_info.fwd_id.vsi_id; + + act = ICE_LG_ACT_VSI_FORWARDING | ICE_LG_ACT_VALID_BIT; + act |= (vsi_info << ICE_LG_ACT_VSI_LIST_ID_S) & + ICE_LG_ACT_VSI_LIST_ID_M; + if (m_ent->vsi_count > 1) + act |= ICE_LG_ACT_VSI_LIST; + lg_act->pdata.lg_act.act[0] = cpu_to_le32(act); + + /* Second action descriptor type */ + act = ICE_LG_ACT_GENERIC; + + act |= (1 << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M; + lg_act->pdata.lg_act.act[1] = cpu_to_le32(act); + + act = (ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX << + ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_OFFSET_M; + + /* Third action Marker value */ + act |= ICE_LG_ACT_GENERIC; + act |= (sw_marker << ICE_LG_ACT_GENERIC_VALUE_S) & + ICE_LG_ACT_GENERIC_VALUE_M; + + lg_act->pdata.lg_act.act[2] = cpu_to_le32(act); + + /* call the fill switch rule to fill the lookup tx rx structure */ + ice_fill_sw_rule(hw, &m_ent->fltr_info, rx_tx, + ice_aqc_opc_update_sw_rules); + + /* Update the action to point to the large action id */ + rx_tx->pdata.lkup_tx_rx.act = + cpu_to_le32(ICE_SINGLE_ACT_PTR | + ((l_id << ICE_SINGLE_ACT_PTR_VAL_S) & + ICE_SINGLE_ACT_PTR_VAL_M)); + + /* Use the filter rule id of the previously created rule with single + * act. Once the update happens, hardware will treat this as large + * action + */ + rx_tx->pdata.lkup_tx_rx.index = + cpu_to_le16(m_ent->fltr_info.fltr_rule_id); + + status = ice_aq_sw_rules(hw, lg_act, rules_size, 2, + ice_aqc_opc_update_sw_rules, NULL); + if (!status) { + m_ent->lg_act_idx = l_id; + m_ent->sw_marker_id = sw_marker; + } + + devm_kfree(ice_hw_to_dev(hw), lg_act); + return status; +} + +/** + * ice_create_vsi_list_map + * @hw: pointer to the hardware structure + * @vsi_array: array of VSIs to form a VSI list + * @num_vsi: num VSI in the array + * @vsi_list_id: VSI list id generated as part of allocate resource + * + * Helper function to create a new entry of VSI list id to VSI mapping + * using the given VSI list id + */ +static struct ice_vsi_list_map_info * +ice_create_vsi_list_map(struct ice_hw *hw, u16 *vsi_array, u16 num_vsi, + u16 vsi_list_id) +{ + struct ice_switch_info *sw = hw->switch_info; + struct ice_vsi_list_map_info *v_map; + int i; + + v_map = devm_kcalloc(ice_hw_to_dev(hw), 1, sizeof(*v_map), GFP_KERNEL); + if (!v_map) + return NULL; + + v_map->vsi_list_id = vsi_list_id; + + for (i = 0; i < num_vsi; i++) + set_bit(vsi_array[i], v_map->vsi_map); + + list_add(&v_map->list_entry, &sw->vsi_list_map_head); + return v_map; +} + +/** + * ice_update_vsi_list_rule + * @hw: pointer to the hardware structure + * @vsi_array: array of VSIs to form a VSI list + * @num_vsi: num VSI in the array + * @vsi_list_id: VSI list id generated as part of allocate resource + * @remove: Boolean value to indicate if this is a remove action + * @opc: switch rules population command type - pass in the command opcode + * @lkup_type: lookup type of the filter + * + * Call AQ command to add a new switch rule or update existing switch rule + * using the given VSI list id + */ +static enum ice_status +ice_update_vsi_list_rule(struct ice_hw *hw, u16 *vsi_array, u16 num_vsi, + u16 vsi_list_id, bool remove, enum ice_adminq_opc opc, + enum ice_sw_lkup_type lkup_type) +{ + struct ice_aqc_sw_rules_elem *s_rule; + enum ice_status status; + u16 s_rule_size; + u16 type; + int i; + + if (!num_vsi) + return ICE_ERR_PARAM; + + if (lkup_type == ICE_SW_LKUP_MAC || + lkup_type == ICE_SW_LKUP_MAC_VLAN || + lkup_type == ICE_SW_LKUP_ETHERTYPE || + lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC || + lkup_type == ICE_SW_LKUP_PROMISC || + lkup_type == ICE_SW_LKUP_PROMISC_VLAN) + type = remove ? ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR : + ICE_AQC_SW_RULES_T_VSI_LIST_SET; + else if (lkup_type == ICE_SW_LKUP_VLAN) + type = remove ? ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR : + ICE_AQC_SW_RULES_T_PRUNE_LIST_SET; + else + return ICE_ERR_PARAM; + + s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(num_vsi); + s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + + for (i = 0; i < num_vsi; i++) + s_rule->pdata.vsi_list.vsi[i] = cpu_to_le16(vsi_array[i]); + + s_rule->type = cpu_to_le16(type); + s_rule->pdata.vsi_list.number_vsi = cpu_to_le16(num_vsi); + s_rule->pdata.vsi_list.index = cpu_to_le16(vsi_list_id); + + status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1, opc, NULL); + + devm_kfree(ice_hw_to_dev(hw), s_rule); + return status; +} + +/** + * ice_create_vsi_list_rule - Creates and populates a VSI list rule + * @hw: pointer to the hw struct + * @vsi_array: array of VSIs to form a VSI list + * @num_vsi: number of VSIs in the array + * @vsi_list_id: stores the ID of the VSI list to be created + * @lkup_type: switch rule filter's lookup type + */ +static enum ice_status +ice_create_vsi_list_rule(struct ice_hw *hw, u16 *vsi_array, u16 num_vsi, + u16 *vsi_list_id, enum ice_sw_lkup_type lkup_type) +{ + enum ice_status status; + int i; + + for (i = 0; i < num_vsi; i++) + if (vsi_array[i] >= ICE_MAX_VSI) + return ICE_ERR_OUT_OF_RANGE; + + status = ice_aq_alloc_free_vsi_list(hw, vsi_list_id, lkup_type, + ice_aqc_opc_alloc_res); + if (status) + return status; + + /* Update the newly created VSI list to include the specified VSIs */ + return ice_update_vsi_list_rule(hw, vsi_array, num_vsi, *vsi_list_id, + false, ice_aqc_opc_add_sw_rules, + lkup_type); +} + +/** + * ice_create_pkt_fwd_rule + * @hw: pointer to the hardware structure + * @f_entry: entry containing packet forwarding information + * + * Create switch rule with given filter information and add an entry + * to the corresponding filter management list to track this switch rule + * and VSI mapping + */ +static enum ice_status +ice_create_pkt_fwd_rule(struct ice_hw *hw, + struct ice_fltr_list_entry *f_entry) +{ + struct ice_switch_info *sw = hw->switch_info; + struct ice_fltr_mgmt_list_entry *fm_entry; + struct ice_aqc_sw_rules_elem *s_rule; + enum ice_sw_lkup_type l_type; + enum ice_status status; + + s_rule = devm_kzalloc(ice_hw_to_dev(hw), + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + fm_entry = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*fm_entry), + GFP_KERNEL); + if (!fm_entry) { + status = ICE_ERR_NO_MEMORY; + goto ice_create_pkt_fwd_rule_exit; + } + + fm_entry->fltr_info = f_entry->fltr_info; + + /* Initialize all the fields for the management entry */ + fm_entry->vsi_count = 1; + fm_entry->lg_act_idx = ICE_INVAL_LG_ACT_INDEX; + fm_entry->sw_marker_id = ICE_INVAL_SW_MARKER_ID; + fm_entry->counter_index = ICE_INVAL_COUNTER_ID; + + ice_fill_sw_rule(hw, &fm_entry->fltr_info, s_rule, + ice_aqc_opc_add_sw_rules); + + status = ice_aq_sw_rules(hw, s_rule, ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1, + ice_aqc_opc_add_sw_rules, NULL); + if (status) { + devm_kfree(ice_hw_to_dev(hw), fm_entry); + goto ice_create_pkt_fwd_rule_exit; + } + + f_entry->fltr_info.fltr_rule_id = + le16_to_cpu(s_rule->pdata.lkup_tx_rx.index); + fm_entry->fltr_info.fltr_rule_id = + le16_to_cpu(s_rule->pdata.lkup_tx_rx.index); + + /* The book keeping entries will get removed when base driver + * calls remove filter AQ command + */ + l_type = fm_entry->fltr_info.lkup_type; + if (l_type == ICE_SW_LKUP_MAC) { + mutex_lock(&sw->mac_list_lock); + list_add(&fm_entry->list_entry, &sw->mac_list_head); + mutex_unlock(&sw->mac_list_lock); + } else if (l_type == ICE_SW_LKUP_VLAN) { + mutex_lock(&sw->vlan_list_lock); + list_add(&fm_entry->list_entry, &sw->vlan_list_head); + mutex_unlock(&sw->vlan_list_lock); + } else if (l_type == ICE_SW_LKUP_ETHERTYPE || + l_type == ICE_SW_LKUP_ETHERTYPE_MAC) { + mutex_lock(&sw->eth_m_list_lock); + list_add(&fm_entry->list_entry, &sw->eth_m_list_head); + mutex_unlock(&sw->eth_m_list_lock); + } else if (l_type == ICE_SW_LKUP_PROMISC || + l_type == ICE_SW_LKUP_PROMISC_VLAN) { + mutex_lock(&sw->promisc_list_lock); + list_add(&fm_entry->list_entry, &sw->promisc_list_head); + mutex_unlock(&sw->promisc_list_lock); + } else if (fm_entry->fltr_info.lkup_type == ICE_SW_LKUP_MAC_VLAN) { + mutex_lock(&sw->mac_vlan_list_lock); + list_add(&fm_entry->list_entry, &sw->mac_vlan_list_head); + mutex_unlock(&sw->mac_vlan_list_lock); + } else { + status = ICE_ERR_NOT_IMPL; + } +ice_create_pkt_fwd_rule_exit: + devm_kfree(ice_hw_to_dev(hw), s_rule); + return status; +} + +/** + * ice_update_pkt_fwd_rule + * @hw: pointer to the hardware structure + * @rule_id: rule of previously created switch rule to update + * @vsi_list_id: VSI list id to be updated with + * @f_info: ice_fltr_info to pull other information for switch rule + * + * Call AQ command to update a previously created switch rule with a + * VSI list id + */ +static enum ice_status +ice_update_pkt_fwd_rule(struct ice_hw *hw, u16 rule_id, u16 vsi_list_id, + struct ice_fltr_info f_info) +{ + struct ice_aqc_sw_rules_elem *s_rule; + struct ice_fltr_info tmp_fltr; + enum ice_status status; + + s_rule = devm_kzalloc(ice_hw_to_dev(hw), + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + + tmp_fltr = f_info; + tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST; + tmp_fltr.fwd_id.vsi_list_id = vsi_list_id; + + ice_fill_sw_rule(hw, &tmp_fltr, s_rule, + ice_aqc_opc_update_sw_rules); + + s_rule->pdata.lkup_tx_rx.index = cpu_to_le16(rule_id); + + /* Update switch rule with new rule set to forward VSI list */ + status = ice_aq_sw_rules(hw, s_rule, ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1, + ice_aqc_opc_update_sw_rules, NULL); + + devm_kfree(ice_hw_to_dev(hw), s_rule); + return status; +} + +/** + * ice_handle_vsi_list_mgmt + * @hw: pointer to the hardware structure + * @m_entry: pointer to current filter management list entry + * @cur_fltr: filter information from the book keeping entry + * @new_fltr: filter information with the new VSI to be added + * + * Call AQ command to add or update previously created VSI list with new VSI. + * + * Helper function to do book keeping associated with adding filter information + * The algorithm to do the booking keeping is described below : + * When a VSI needs to subscribe to a given filter( MAC/VLAN/Ethtype etc.) + * if only one VSI has been added till now + * Allocate a new VSI list and add two VSIs + * to this list using switch rule command + * Update the previously created switch rule with the + * newly created VSI list id + * if a VSI list was previously created + * Add the new VSI to the previously created VSI list set + * using the update switch rule command + */ +static enum ice_status +ice_handle_vsi_list_mgmt(struct ice_hw *hw, + struct ice_fltr_mgmt_list_entry *m_entry, + struct ice_fltr_info *cur_fltr, + struct ice_fltr_info *new_fltr) +{ + enum ice_status status = 0; + u16 vsi_list_id = 0; + + if ((cur_fltr->fltr_act == ICE_FWD_TO_Q || + cur_fltr->fltr_act == ICE_FWD_TO_QGRP)) + return ICE_ERR_NOT_IMPL; + + if ((new_fltr->fltr_act == ICE_FWD_TO_Q || + new_fltr->fltr_act == ICE_FWD_TO_QGRP) && + (cur_fltr->fltr_act == ICE_FWD_TO_VSI || + cur_fltr->fltr_act == ICE_FWD_TO_VSI_LIST)) + return ICE_ERR_NOT_IMPL; + + if (m_entry->vsi_count < 2 && !m_entry->vsi_list_info) { + /* Only one entry existed in the mapping and it was not already + * a part of a VSI list. So, create a VSI list with the old and + * new VSIs. + */ + u16 vsi_id_arr[2]; + u16 fltr_rule; + + /* A rule already exists with the new VSI being added */ + if (cur_fltr->fwd_id.vsi_id == new_fltr->fwd_id.vsi_id) + return ICE_ERR_ALREADY_EXISTS; + + vsi_id_arr[0] = cur_fltr->fwd_id.vsi_id; + vsi_id_arr[1] = new_fltr->fwd_id.vsi_id; + status = ice_create_vsi_list_rule(hw, &vsi_id_arr[0], 2, + &vsi_list_id, + new_fltr->lkup_type); + if (status) + return status; + + fltr_rule = cur_fltr->fltr_rule_id; + /* Update the previous switch rule of "MAC forward to VSI" to + * "MAC fwd to VSI list" + */ + status = ice_update_pkt_fwd_rule(hw, fltr_rule, vsi_list_id, + *new_fltr); + if (status) + return status; + + cur_fltr->fwd_id.vsi_list_id = vsi_list_id; + cur_fltr->fltr_act = ICE_FWD_TO_VSI_LIST; + m_entry->vsi_list_info = + ice_create_vsi_list_map(hw, &vsi_id_arr[0], 2, + vsi_list_id); + + /* If this entry was large action then the large action needs + * to be updated to point to FWD to VSI list + */ + if (m_entry->sw_marker_id != ICE_INVAL_SW_MARKER_ID) + status = + ice_add_marker_act(hw, m_entry, + m_entry->sw_marker_id, + m_entry->lg_act_idx); + } else { + u16 vsi_id = new_fltr->fwd_id.vsi_id; + enum ice_adminq_opc opcode; + + if (!m_entry->vsi_list_info) + return ICE_ERR_CFG; + + /* A rule already exists with the new VSI being added */ + if (test_bit(vsi_id, m_entry->vsi_list_info->vsi_map)) + return 0; + + /* Update the previously created VSI list set with + * the new VSI id passed in + */ + vsi_list_id = cur_fltr->fwd_id.vsi_list_id; + opcode = ice_aqc_opc_update_sw_rules; + + status = ice_update_vsi_list_rule(hw, &vsi_id, 1, vsi_list_id, + false, opcode, + new_fltr->lkup_type); + /* update VSI list mapping info with new VSI id */ + if (!status) + set_bit(vsi_id, m_entry->vsi_list_info->vsi_map); + } + if (!status) + m_entry->vsi_count++; + return status; +} + +/** + * ice_find_mac_entry + * @hw: pointer to the hardware structure + * @mac_addr: MAC address to search for + * + * Helper function to search for a MAC entry using a given MAC address + * Returns pointer to the entry if found. + */ +static struct ice_fltr_mgmt_list_entry * +ice_find_mac_entry(struct ice_hw *hw, u8 *mac_addr) +{ + struct ice_fltr_mgmt_list_entry *m_list_itr, *mac_ret = NULL; + struct ice_switch_info *sw = hw->switch_info; + + mutex_lock(&sw->mac_list_lock); + list_for_each_entry(m_list_itr, &sw->mac_list_head, list_entry) { + u8 *buf = &m_list_itr->fltr_info.l_data.mac.mac_addr[0]; + + if (ether_addr_equal(buf, mac_addr)) { + mac_ret = m_list_itr; + break; + } + } + mutex_unlock(&sw->mac_list_lock); + return mac_ret; +} + +/** + * ice_add_shared_mac - Add one MAC shared filter rule + * @hw: pointer to the hardware structure + * @f_entry: structure containing MAC forwarding information + * + * Adds or updates the book keeping list for the MAC addresses + */ +static enum ice_status +ice_add_shared_mac(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry) +{ + struct ice_fltr_info *new_fltr, *cur_fltr; + struct ice_fltr_mgmt_list_entry *m_entry; + + new_fltr = &f_entry->fltr_info; + + m_entry = ice_find_mac_entry(hw, &new_fltr->l_data.mac.mac_addr[0]); + if (!m_entry) + return ice_create_pkt_fwd_rule(hw, f_entry); + + cur_fltr = &m_entry->fltr_info; + + return ice_handle_vsi_list_mgmt(hw, m_entry, cur_fltr, new_fltr); +} + +/** + * ice_add_mac - Add a MAC address based filter rule + * @hw: pointer to the hardware structure + * @m_list: list of MAC addresses and forwarding information + * + * IMPORTANT: When the ucast_shared flag is set to false and m_list has + * multiple unicast addresses, the function assumes that all the + * addresses are unique in a given add_mac call. It doesn't + * check for duplicates in this case, removing duplicates from a given + * list should be taken care of in the caller of this function. + */ +enum ice_status +ice_add_mac(struct ice_hw *hw, struct list_head *m_list) +{ + struct ice_aqc_sw_rules_elem *s_rule, *r_iter; + struct ice_fltr_list_entry *m_list_itr; + u16 elem_sent, total_elem_left; + enum ice_status status = 0; + u16 num_unicast = 0; + u16 s_rule_size; + + if (!m_list || !hw) + return ICE_ERR_PARAM; + + list_for_each_entry(m_list_itr, m_list, list_entry) { + u8 *add = &m_list_itr->fltr_info.l_data.mac.mac_addr[0]; + + if (m_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_MAC) + return ICE_ERR_PARAM; + if (is_zero_ether_addr(add)) + return ICE_ERR_PARAM; + if (is_unicast_ether_addr(add) && !hw->ucast_shared) { + /* Don't overwrite the unicast address */ + if (ice_find_mac_entry(hw, add)) + return ICE_ERR_ALREADY_EXISTS; + num_unicast++; + } else if (is_multicast_ether_addr(add) || + (is_unicast_ether_addr(add) && hw->ucast_shared)) { + status = ice_add_shared_mac(hw, m_list_itr); + if (status) { + m_list_itr->status = ICE_FLTR_STATUS_FW_FAIL; + return status; + } + m_list_itr->status = ICE_FLTR_STATUS_FW_SUCCESS; + } + } + + /* Exit if no suitable entries were found for adding bulk switch rule */ + if (!num_unicast) + return 0; + + /* Allocate switch rule buffer for the bulk update for unicast */ + s_rule_size = ICE_SW_RULE_RX_TX_ETH_HDR_SIZE; + s_rule = devm_kcalloc(ice_hw_to_dev(hw), num_unicast, s_rule_size, + GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + + r_iter = s_rule; + list_for_each_entry(m_list_itr, m_list, list_entry) { + struct ice_fltr_info *f_info = &m_list_itr->fltr_info; + u8 *addr = &f_info->l_data.mac.mac_addr[0]; + + if (is_unicast_ether_addr(addr)) { + ice_fill_sw_rule(hw, &m_list_itr->fltr_info, + r_iter, ice_aqc_opc_add_sw_rules); + r_iter = (struct ice_aqc_sw_rules_elem *) + ((u8 *)r_iter + s_rule_size); + } + } + + /* Call AQ bulk switch rule update for all unicast addresses */ + r_iter = s_rule; + /* Call AQ switch rule in AQ_MAX chunk */ + for (total_elem_left = num_unicast; total_elem_left > 0; + total_elem_left -= elem_sent) { + struct ice_aqc_sw_rules_elem *entry = r_iter; + + elem_sent = min(total_elem_left, + (u16)(ICE_AQ_MAX_BUF_LEN / s_rule_size)); + status = ice_aq_sw_rules(hw, entry, elem_sent * s_rule_size, + elem_sent, ice_aqc_opc_add_sw_rules, + NULL); + if (status) + goto ice_add_mac_exit; + r_iter = (struct ice_aqc_sw_rules_elem *) + ((u8 *)r_iter + (elem_sent * s_rule_size)); + } + + /* Fill up rule id based on the value returned from FW */ + r_iter = s_rule; + list_for_each_entry(m_list_itr, m_list, list_entry) { + struct ice_fltr_info *f_info = &m_list_itr->fltr_info; + u8 *addr = &f_info->l_data.mac.mac_addr[0]; + struct ice_switch_info *sw = hw->switch_info; + struct ice_fltr_mgmt_list_entry *fm_entry; + + if (is_unicast_ether_addr(addr)) { + f_info->fltr_rule_id = + le16_to_cpu(r_iter->pdata.lkup_tx_rx.index); + f_info->fltr_act = ICE_FWD_TO_VSI; + /* Create an entry to track this MAC address */ + fm_entry = devm_kzalloc(ice_hw_to_dev(hw), + sizeof(*fm_entry), GFP_KERNEL); + if (!fm_entry) { + status = ICE_ERR_NO_MEMORY; + goto ice_add_mac_exit; + } + fm_entry->fltr_info = *f_info; + fm_entry->vsi_count = 1; + /* The book keeping entries will get removed when + * base driver calls remove filter AQ command + */ + mutex_lock(&sw->mac_list_lock); + list_add(&fm_entry->list_entry, &sw->mac_list_head); + mutex_unlock(&sw->mac_list_lock); + + r_iter = (struct ice_aqc_sw_rules_elem *) + ((u8 *)r_iter + s_rule_size); + } + } + +ice_add_mac_exit: + devm_kfree(ice_hw_to_dev(hw), s_rule); + return status; +} + +/** + * ice_find_vlan_entry + * @hw: pointer to the hardware structure + * @vlan_id: VLAN id to search for + * + * Helper function to search for a VLAN entry using a given VLAN id + * Returns pointer to the entry if found. + */ +static struct ice_fltr_mgmt_list_entry * +ice_find_vlan_entry(struct ice_hw *hw, u16 vlan_id) +{ + struct ice_fltr_mgmt_list_entry *vlan_list_itr, *vlan_ret = NULL; + struct ice_switch_info *sw = hw->switch_info; + + mutex_lock(&sw->vlan_list_lock); + list_for_each_entry(vlan_list_itr, &sw->vlan_list_head, list_entry) + if (vlan_list_itr->fltr_info.l_data.vlan.vlan_id == vlan_id) { + vlan_ret = vlan_list_itr; + break; + } + + mutex_unlock(&sw->vlan_list_lock); + return vlan_ret; +} + +/** + * ice_add_vlan_internal - Add one VLAN based filter rule + * @hw: pointer to the hardware structure + * @f_entry: filter entry containing one VLAN information + */ +static enum ice_status +ice_add_vlan_internal(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry) +{ + struct ice_fltr_info *new_fltr, *cur_fltr; + struct ice_fltr_mgmt_list_entry *v_list_itr; + u16 vlan_id; + + new_fltr = &f_entry->fltr_info; + /* VLAN id should only be 12 bits */ + if (new_fltr->l_data.vlan.vlan_id > ICE_MAX_VLAN_ID) + return ICE_ERR_PARAM; + + vlan_id = new_fltr->l_data.vlan.vlan_id; + v_list_itr = ice_find_vlan_entry(hw, vlan_id); + if (!v_list_itr) { + u16 vsi_id = ICE_VSI_INVAL_ID; + enum ice_status status; + u16 vsi_list_id = 0; + + if (new_fltr->fltr_act == ICE_FWD_TO_VSI) { + enum ice_sw_lkup_type lkup_type = new_fltr->lkup_type; + + /* All VLAN pruning rules use a VSI list. + * Convert the action to forwarding to a VSI list. + */ + vsi_id = new_fltr->fwd_id.vsi_id; + status = ice_create_vsi_list_rule(hw, &vsi_id, 1, + &vsi_list_id, + lkup_type); + if (status) + return status; + new_fltr->fltr_act = ICE_FWD_TO_VSI_LIST; + new_fltr->fwd_id.vsi_list_id = vsi_list_id; + } + + status = ice_create_pkt_fwd_rule(hw, f_entry); + if (!status && vsi_id != ICE_VSI_INVAL_ID) { + v_list_itr = ice_find_vlan_entry(hw, vlan_id); + if (!v_list_itr) + return ICE_ERR_DOES_NOT_EXIST; + v_list_itr->vsi_list_info = + ice_create_vsi_list_map(hw, &vsi_id, 1, + vsi_list_id); + } + + return status; + } + + cur_fltr = &v_list_itr->fltr_info; + return ice_handle_vsi_list_mgmt(hw, v_list_itr, cur_fltr, new_fltr); +} + +/** + * ice_add_vlan - Add VLAN based filter rule + * @hw: pointer to the hardware structure + * @v_list: list of VLAN entries and forwarding information + */ +enum ice_status +ice_add_vlan(struct ice_hw *hw, struct list_head *v_list) +{ + struct ice_fltr_list_entry *v_list_itr; + + if (!v_list || !hw) + return ICE_ERR_PARAM; + + list_for_each_entry(v_list_itr, v_list, list_entry) { + enum ice_status status; + + if (v_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_VLAN) + return ICE_ERR_PARAM; + + status = ice_add_vlan_internal(hw, v_list_itr); + if (status) { + v_list_itr->status = ICE_FLTR_STATUS_FW_FAIL; + return status; + } + v_list_itr->status = ICE_FLTR_STATUS_FW_SUCCESS; + } + return 0; +} + +/** + * ice_remove_vsi_list_rule + * @hw: pointer to the hardware structure + * @vsi_list_id: VSI list id generated as part of allocate resource + * @lkup_type: switch rule filter lookup type + */ +static enum ice_status +ice_remove_vsi_list_rule(struct ice_hw *hw, u16 vsi_list_id, + enum ice_sw_lkup_type lkup_type) +{ + struct ice_aqc_sw_rules_elem *s_rule; + enum ice_status status; + u16 s_rule_size; + + s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(0); + s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + + s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR); + s_rule->pdata.vsi_list.index = cpu_to_le16(vsi_list_id); + /* FW expects number of VSIs in vsi_list resource to be 0 for clear + * command. Since memory is zero'ed out during initialization, it's not + * necessary to explicitly initialize the variable to 0. + */ + + status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1, + ice_aqc_opc_remove_sw_rules, NULL); + if (!status) + /* Free the vsi_list resource that we allocated */ + status = ice_aq_alloc_free_vsi_list(hw, &vsi_list_id, lkup_type, + ice_aqc_opc_free_res); + + devm_kfree(ice_hw_to_dev(hw), s_rule); + return status; +} + +/** + * ice_handle_rem_vsi_list_mgmt + * @hw: pointer to the hardware structure + * @vsi_id: ID of the VSI to remove + * @fm_list_itr: filter management entry for which the VSI list management + * needs to be done + */ +static enum ice_status +ice_handle_rem_vsi_list_mgmt(struct ice_hw *hw, u16 vsi_id, + struct ice_fltr_mgmt_list_entry *fm_list_itr) +{ + struct ice_switch_info *sw = hw->switch_info; + enum ice_status status = 0; + enum ice_sw_lkup_type lkup_type; + bool is_last_elem = true; + bool conv_list = false; + bool del_list = false; + u16 vsi_list_id; + + lkup_type = fm_list_itr->fltr_info.lkup_type; + vsi_list_id = fm_list_itr->fltr_info.fwd_id.vsi_list_id; + + if (fm_list_itr->vsi_count > 1) { + status = ice_update_vsi_list_rule(hw, &vsi_id, 1, vsi_list_id, + true, + ice_aqc_opc_update_sw_rules, + lkup_type); + if (status) + return status; + fm_list_itr->vsi_count--; + is_last_elem = false; + clear_bit(vsi_id, fm_list_itr->vsi_list_info->vsi_map); + } + + /* For non-VLAN rules that forward packets to a VSI list, convert them + * to forwarding packets to a VSI if there is only one VSI left in the + * list. Unused lists are then removed. + * VLAN rules need to use VSI lists even with only one VSI. + */ + if (fm_list_itr->fltr_info.fltr_act == ICE_FWD_TO_VSI_LIST) { + if (lkup_type == ICE_SW_LKUP_VLAN) { + del_list = is_last_elem; + } else if (fm_list_itr->vsi_count == 1) { + conv_list = true; + del_list = true; + } + } + + if (del_list) { + /* Remove the VSI list since it is no longer used */ + struct ice_vsi_list_map_info *vsi_list_info = + fm_list_itr->vsi_list_info; + + status = ice_remove_vsi_list_rule(hw, vsi_list_id, lkup_type); + if (status) + return status; + + if (conv_list) { + u16 rem_vsi_id; + + rem_vsi_id = find_first_bit(vsi_list_info->vsi_map, + ICE_MAX_VSI); + + /* Error out when the expected last element is not in + * the VSI list map + */ + if (rem_vsi_id == ICE_MAX_VSI) + return ICE_ERR_OUT_OF_RANGE; + + /* Change the list entry action from VSI_LIST to VSI */ + fm_list_itr->fltr_info.fltr_act = ICE_FWD_TO_VSI; + fm_list_itr->fltr_info.fwd_id.vsi_id = rem_vsi_id; + } + + list_del(&vsi_list_info->list_entry); + devm_kfree(ice_hw_to_dev(hw), vsi_list_info); + fm_list_itr->vsi_list_info = NULL; + } + + if (conv_list) { + /* Convert the rule's forward action to forwarding packets to + * a VSI + */ + struct ice_aqc_sw_rules_elem *s_rule; + + s_rule = devm_kzalloc(ice_hw_to_dev(hw), + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, + GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + + ice_fill_sw_rule(hw, &fm_list_itr->fltr_info, s_rule, + ice_aqc_opc_update_sw_rules); + + s_rule->pdata.lkup_tx_rx.index = + cpu_to_le16(fm_list_itr->fltr_info.fltr_rule_id); + + status = ice_aq_sw_rules(hw, s_rule, + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1, + ice_aqc_opc_update_sw_rules, NULL); + devm_kfree(ice_hw_to_dev(hw), s_rule); + if (status) + return status; + } + + if (is_last_elem) { + /* Remove the lookup rule */ + struct ice_aqc_sw_rules_elem *s_rule; + + s_rule = devm_kzalloc(ice_hw_to_dev(hw), + ICE_SW_RULE_RX_TX_NO_HDR_SIZE, + GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + + ice_fill_sw_rule(hw, &fm_list_itr->fltr_info, s_rule, + ice_aqc_opc_remove_sw_rules); + + status = ice_aq_sw_rules(hw, s_rule, + ICE_SW_RULE_RX_TX_NO_HDR_SIZE, 1, + ice_aqc_opc_remove_sw_rules, NULL); + if (status) + return status; + + /* Remove a book keeping entry from the MAC address list */ + mutex_lock(&sw->mac_list_lock); + list_del(&fm_list_itr->list_entry); + mutex_unlock(&sw->mac_list_lock); + devm_kfree(ice_hw_to_dev(hw), fm_list_itr); + devm_kfree(ice_hw_to_dev(hw), s_rule); + } + return status; +} + +/** + * ice_remove_mac_entry + * @hw: pointer to the hardware structure + * @f_entry: structure containing MAC forwarding information + */ +static enum ice_status +ice_remove_mac_entry(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry) +{ + struct ice_fltr_mgmt_list_entry *m_entry; + u16 vsi_id; + u8 *add; + + add = &f_entry->fltr_info.l_data.mac.mac_addr[0]; + + m_entry = ice_find_mac_entry(hw, add); + if (!m_entry) + return ICE_ERR_PARAM; + + vsi_id = f_entry->fltr_info.fwd_id.vsi_id; + return ice_handle_rem_vsi_list_mgmt(hw, vsi_id, m_entry); +} + +/** + * ice_remove_mac - remove a MAC address based filter rule + * @hw: pointer to the hardware structure + * @m_list: list of MAC addresses and forwarding information + * + * This function removes either a MAC filter rule or a specific VSI from a + * VSI list for a multicast MAC address. + * + * Returns ICE_ERR_DOES_NOT_EXIST if a given entry was not added by + * ice_add_mac. Caller should be aware that this call will only work if all + * the entries passed into m_list were added previously. It will not attempt to + * do a partial remove of entries that were found. + */ +enum ice_status +ice_remove_mac(struct ice_hw *hw, struct list_head *m_list) +{ + struct ice_aqc_sw_rules_elem *s_rule, *r_iter; + u8 s_rule_size = ICE_SW_RULE_RX_TX_NO_HDR_SIZE; + struct ice_switch_info *sw = hw->switch_info; + struct ice_fltr_mgmt_list_entry *m_entry; + struct ice_fltr_list_entry *m_list_itr; + u16 elem_sent, total_elem_left; + enum ice_status status = 0; + u16 num_unicast = 0; + + if (!m_list) + return ICE_ERR_PARAM; + + list_for_each_entry(m_list_itr, m_list, list_entry) { + u8 *addr = m_list_itr->fltr_info.l_data.mac.mac_addr; + + if (is_unicast_ether_addr(addr) && !hw->ucast_shared) + num_unicast++; + else if (is_multicast_ether_addr(addr) || + (is_unicast_ether_addr(addr) && hw->ucast_shared)) + ice_remove_mac_entry(hw, m_list_itr); + } + + /* Exit if no unicast addresses found. Multicast switch rules + * were added individually + */ + if (!num_unicast) + return 0; + + /* Allocate switch rule buffer for the bulk update for unicast */ + s_rule = devm_kcalloc(ice_hw_to_dev(hw), num_unicast, s_rule_size, + GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + + r_iter = s_rule; + list_for_each_entry(m_list_itr, m_list, list_entry) { + u8 *addr = m_list_itr->fltr_info.l_data.mac.mac_addr; + + if (is_unicast_ether_addr(addr)) { + m_entry = ice_find_mac_entry(hw, addr); + if (!m_entry) { + status = ICE_ERR_DOES_NOT_EXIST; + goto ice_remove_mac_exit; + } + + ice_fill_sw_rule(hw, &m_entry->fltr_info, + r_iter, ice_aqc_opc_remove_sw_rules); + r_iter = (struct ice_aqc_sw_rules_elem *) + ((u8 *)r_iter + s_rule_size); + } + } + + /* Call AQ bulk switch rule update for all unicast addresses */ + r_iter = s_rule; + /* Call AQ switch rule in AQ_MAX chunk */ + for (total_elem_left = num_unicast; total_elem_left > 0; + total_elem_left -= elem_sent) { + struct ice_aqc_sw_rules_elem *entry = r_iter; + + elem_sent = min(total_elem_left, + (u16)(ICE_AQ_MAX_BUF_LEN / s_rule_size)); + status = ice_aq_sw_rules(hw, entry, elem_sent * s_rule_size, + elem_sent, ice_aqc_opc_remove_sw_rules, + NULL); + if (status) + break; + r_iter = (struct ice_aqc_sw_rules_elem *) + ((u8 *)r_iter + s_rule_size); + } + + list_for_each_entry(m_list_itr, m_list, list_entry) { + u8 *addr = m_list_itr->fltr_info.l_data.mac.mac_addr; + + if (is_unicast_ether_addr(addr)) { + m_entry = ice_find_mac_entry(hw, addr); + if (!m_entry) + return ICE_ERR_OUT_OF_RANGE; + mutex_lock(&sw->mac_list_lock); + list_del(&m_entry->list_entry); + mutex_unlock(&sw->mac_list_lock); + devm_kfree(ice_hw_to_dev(hw), m_entry); + } + } + +ice_remove_mac_exit: + devm_kfree(ice_hw_to_dev(hw), s_rule); + return status; +} + +/** + * ice_cfg_dflt_vsi - add filter rule to set/unset given VSI as default + * VSI for the switch (represented by swid) + * @hw: pointer to the hardware structure + * @vsi_id: number of VSI to set as default + * @set: true to add the above mentioned switch rule, false to remove it + * @direction: ICE_FLTR_RX or ICE_FLTR_TX + */ +enum ice_status +ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_id, bool set, u8 direction) +{ + struct ice_aqc_sw_rules_elem *s_rule; + struct ice_fltr_info f_info; + enum ice_adminq_opc opcode; + enum ice_status status; + u16 s_rule_size; + + s_rule_size = set ? ICE_SW_RULE_RX_TX_ETH_HDR_SIZE : + ICE_SW_RULE_RX_TX_NO_HDR_SIZE; + s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + + memset(&f_info, 0, sizeof(f_info)); + + f_info.lkup_type = ICE_SW_LKUP_DFLT; + f_info.flag = direction; + f_info.fltr_act = ICE_FWD_TO_VSI; + f_info.fwd_id.vsi_id = vsi_id; + + if (f_info.flag & ICE_FLTR_RX) { + f_info.src = hw->port_info->lport; + if (!set) + f_info.fltr_rule_id = + hw->port_info->dflt_rx_vsi_rule_id; + } else if (f_info.flag & ICE_FLTR_TX) { + f_info.src = vsi_id; + if (!set) + f_info.fltr_rule_id = + hw->port_info->dflt_tx_vsi_rule_id; + } + + if (set) + opcode = ice_aqc_opc_add_sw_rules; + else + opcode = ice_aqc_opc_remove_sw_rules; + + ice_fill_sw_rule(hw, &f_info, s_rule, opcode); + + status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1, opcode, NULL); + if (status || !(f_info.flag & ICE_FLTR_TX_RX)) + goto out; + if (set) { + u16 index = le16_to_cpu(s_rule->pdata.lkup_tx_rx.index); + + if (f_info.flag & ICE_FLTR_TX) { + hw->port_info->dflt_tx_vsi_num = vsi_id; + hw->port_info->dflt_tx_vsi_rule_id = index; + } else if (f_info.flag & ICE_FLTR_RX) { + hw->port_info->dflt_rx_vsi_num = vsi_id; + hw->port_info->dflt_rx_vsi_rule_id = index; + } + } else { + if (f_info.flag & ICE_FLTR_TX) { + hw->port_info->dflt_tx_vsi_num = ICE_DFLT_VSI_INVAL; + hw->port_info->dflt_tx_vsi_rule_id = ICE_INVAL_ACT; + } else if (f_info.flag & ICE_FLTR_RX) { + hw->port_info->dflt_rx_vsi_num = ICE_DFLT_VSI_INVAL; + hw->port_info->dflt_rx_vsi_rule_id = ICE_INVAL_ACT; + } + } + +out: + devm_kfree(ice_hw_to_dev(hw), s_rule); + return status; +} + +/** + * ice_remove_vlan_internal - Remove one VLAN based filter rule + * @hw: pointer to the hardware structure + * @f_entry: filter entry containing one VLAN information + */ +static enum ice_status +ice_remove_vlan_internal(struct ice_hw *hw, + struct ice_fltr_list_entry *f_entry) +{ + struct ice_fltr_info *new_fltr; + struct ice_fltr_mgmt_list_entry *v_list_elem; + u16 vsi_id; + + new_fltr = &f_entry->fltr_info; + + v_list_elem = ice_find_vlan_entry(hw, new_fltr->l_data.vlan.vlan_id); + if (!v_list_elem) + return ICE_ERR_PARAM; + + vsi_id = f_entry->fltr_info.fwd_id.vsi_id; + return ice_handle_rem_vsi_list_mgmt(hw, vsi_id, v_list_elem); +} + +/** + * ice_remove_vlan - Remove VLAN based filter rule + * @hw: pointer to the hardware structure + * @v_list: list of VLAN entries and forwarding information + */ +enum ice_status +ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list) +{ + struct ice_fltr_list_entry *v_list_itr; + enum ice_status status = 0; + + if (!v_list || !hw) + return ICE_ERR_PARAM; + + list_for_each_entry(v_list_itr, v_list, list_entry) { + status = ice_remove_vlan_internal(hw, v_list_itr); + if (status) { + v_list_itr->status = ICE_FLTR_STATUS_FW_FAIL; + return status; + } + v_list_itr->status = ICE_FLTR_STATUS_FW_SUCCESS; + } + return status; +} + +/** + * ice_add_to_vsi_fltr_list - Add VSI filters to the list + * @hw: pointer to the hardware structure + * @vsi_id: ID of VSI to remove filters from + * @lkup_list_head: pointer to the list that has certain lookup type filters + * @vsi_list_head: pointer to the list pertaining to VSI with vsi_id + */ +static enum ice_status +ice_add_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_id, + struct list_head *lkup_list_head, + struct list_head *vsi_list_head) +{ + struct ice_fltr_mgmt_list_entry *fm_entry; + + /* check to make sure VSI id is valid and within boundary */ + if (vsi_id >= + (sizeof(fm_entry->vsi_list_info->vsi_map) * BITS_PER_BYTE - 1)) + return ICE_ERR_PARAM; + + list_for_each_entry(fm_entry, lkup_list_head, list_entry) { + struct ice_fltr_info *fi; + + fi = &fm_entry->fltr_info; + if ((fi->fltr_act == ICE_FWD_TO_VSI && + fi->fwd_id.vsi_id == vsi_id) || + (fi->fltr_act == ICE_FWD_TO_VSI_LIST && + (test_bit(vsi_id, fm_entry->vsi_list_info->vsi_map)))) { + struct ice_fltr_list_entry *tmp; + + /* this memory is freed up in the caller function + * ice_remove_vsi_lkup_fltr() once filters for + * this VSI are removed + */ + tmp = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*tmp), + GFP_KERNEL); + if (!tmp) + return ICE_ERR_NO_MEMORY; + + memcpy(&tmp->fltr_info, fi, sizeof(*fi)); + + /* Expected below fields to be set to ICE_FWD_TO_VSI and + * the particular VSI id since we are only removing this + * one VSI + */ + if (fi->fltr_act == ICE_FWD_TO_VSI_LIST) { + tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI; + tmp->fltr_info.fwd_id.vsi_id = vsi_id; + } + + list_add(&tmp->list_entry, vsi_list_head); + } + } + return 0; +} + +/** + * ice_remove_vsi_lkup_fltr - Remove lookup type filters for a VSI + * @hw: pointer to the hardware structure + * @vsi_id: ID of VSI to remove filters from + * @lkup: switch rule filter lookup type + */ +static void +ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_id, + enum ice_sw_lkup_type lkup) +{ + struct ice_switch_info *sw = hw->switch_info; + struct ice_fltr_list_entry *fm_entry; + struct list_head remove_list_head; + struct ice_fltr_list_entry *tmp; + enum ice_status status; + + INIT_LIST_HEAD(&remove_list_head); + switch (lkup) { + case ICE_SW_LKUP_MAC: + mutex_lock(&sw->mac_list_lock); + status = ice_add_to_vsi_fltr_list(hw, vsi_id, + &sw->mac_list_head, + &remove_list_head); + mutex_unlock(&sw->mac_list_lock); + if (!status) { + ice_remove_mac(hw, &remove_list_head); + goto free_fltr_list; + } + break; + case ICE_SW_LKUP_VLAN: + mutex_lock(&sw->vlan_list_lock); + status = ice_add_to_vsi_fltr_list(hw, vsi_id, + &sw->vlan_list_head, + &remove_list_head); + mutex_unlock(&sw->vlan_list_lock); + if (!status) { + ice_remove_vlan(hw, &remove_list_head); + goto free_fltr_list; + } + break; + case ICE_SW_LKUP_MAC_VLAN: + case ICE_SW_LKUP_ETHERTYPE: + case ICE_SW_LKUP_ETHERTYPE_MAC: + case ICE_SW_LKUP_PROMISC: + case ICE_SW_LKUP_PROMISC_VLAN: + case ICE_SW_LKUP_DFLT: + ice_debug(hw, ICE_DBG_SW, + "Remove filters for this lookup type hasn't been implemented yet\n"); + break; + } + + return; +free_fltr_list: + list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) { + list_del(&fm_entry->list_entry); + devm_kfree(ice_hw_to_dev(hw), fm_entry); + } +} + +/** + * ice_remove_vsi_fltr - Remove all filters for a VSI + * @hw: pointer to the hardware structure + * @vsi_id: ID of VSI to remove filters from + */ +void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_id) +{ + ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_MAC); + ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_MAC_VLAN); + ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_PROMISC); + ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_VLAN); + ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_DFLT); + ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_ETHERTYPE); + ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_ETHERTYPE_MAC); + ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_PROMISC_VLAN); +} diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h new file mode 100644 index 000000000..9b8ec128e --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -0,0 +1,161 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_SWITCH_H_ +#define _ICE_SWITCH_H_ + +#include "ice_common.h" + +#define ICE_SW_CFG_MAX_BUF_LEN 2048 +#define ICE_DFLT_VSI_INVAL 0xff +#define ICE_VSI_INVAL_ID 0xffff + +/* VSI context structure for add/get/update/free operations */ +struct ice_vsi_ctx { + u16 vsi_num; + u16 vsis_allocd; + u16 vsis_unallocated; + u16 flags; + struct ice_aqc_vsi_props info; + u8 alloc_from_pool; +}; + +enum ice_sw_fwd_act_type { + ICE_FWD_TO_VSI = 0, + ICE_FWD_TO_VSI_LIST, /* Do not use this when adding filter */ + ICE_FWD_TO_Q, + ICE_FWD_TO_QGRP, + ICE_DROP_PACKET, + ICE_INVAL_ACT +}; + +/* Switch recipe ID enum values are specific to hardware */ +enum ice_sw_lkup_type { + ICE_SW_LKUP_ETHERTYPE = 0, + ICE_SW_LKUP_MAC = 1, + ICE_SW_LKUP_MAC_VLAN = 2, + ICE_SW_LKUP_PROMISC = 3, + ICE_SW_LKUP_VLAN = 4, + ICE_SW_LKUP_DFLT = 5, + ICE_SW_LKUP_ETHERTYPE_MAC = 8, + ICE_SW_LKUP_PROMISC_VLAN = 9, +}; + +struct ice_fltr_info { + /* Look up information: how to look up packet */ + enum ice_sw_lkup_type lkup_type; + /* Forward action: filter action to do after lookup */ + enum ice_sw_fwd_act_type fltr_act; + /* rule ID returned by firmware once filter rule is created */ + u16 fltr_rule_id; + u16 flag; +#define ICE_FLTR_RX BIT(0) +#define ICE_FLTR_TX BIT(1) +#define ICE_FLTR_TX_RX (ICE_FLTR_RX | ICE_FLTR_TX) + + /* Source VSI for LOOKUP_TX or source port for LOOKUP_RX */ + u16 src; + + union { + struct { + u8 mac_addr[ETH_ALEN]; + } mac; + struct { + u8 mac_addr[ETH_ALEN]; + u16 vlan_id; + } mac_vlan; + struct { + u16 vlan_id; + } vlan; + /* Set lkup_type as ICE_SW_LKUP_ETHERTYPE + * if just using ethertype as filter. Set lkup_type as + * ICE_SW_LKUP_ETHERTYPE_MAC if MAC also needs to be + * passed in as filter. + */ + struct { + u16 ethertype; + u8 mac_addr[ETH_ALEN]; /* optional */ + } ethertype_mac; + } l_data; + + /* Depending on filter action */ + union { + /* queue id in case of ICE_FWD_TO_Q and starting + * queue id in case of ICE_FWD_TO_QGRP. + */ + u16 q_id:11; + u16 vsi_id:10; + u16 vsi_list_id:10; + } fwd_id; + + /* Set to num_queues if action is ICE_FWD_TO_QGRP. This field + * determines the range of queues the packet needs to be forwarded to + */ + u8 qgrp_size; + + /* Rule creations populate these indicators basing on the switch type */ + u8 lb_en; /* Indicate if packet can be looped back */ + u8 lan_en; /* Indicate if packet can be forwarded to the uplink */ +}; + +/* Bookkeeping structure to hold bitmap of VSIs corresponding to VSI list id */ +struct ice_vsi_list_map_info { + struct list_head list_entry; + DECLARE_BITMAP(vsi_map, ICE_MAX_VSI); + u16 vsi_list_id; +}; + +enum ice_sw_fltr_status { + ICE_FLTR_STATUS_NEW = 0, + ICE_FLTR_STATUS_FW_SUCCESS, + ICE_FLTR_STATUS_FW_FAIL, +}; + +struct ice_fltr_list_entry { + struct list_head list_entry; + enum ice_sw_fltr_status status; + struct ice_fltr_info fltr_info; +}; + +/* This defines an entry in the list that maintains MAC or VLAN membership + * to HW list mapping, since multiple VSIs can subscribe to the same MAC or + * VLAN. As an optimization the VSI list should be created only when a + * second VSI becomes a subscriber to the VLAN address. + */ +struct ice_fltr_mgmt_list_entry { + /* back pointer to VSI list id to VSI list mapping */ + struct ice_vsi_list_map_info *vsi_list_info; + u16 vsi_count; +#define ICE_INVAL_LG_ACT_INDEX 0xffff + u16 lg_act_idx; +#define ICE_INVAL_SW_MARKER_ID 0xffff + u16 sw_marker_id; + struct list_head list_entry; + struct ice_fltr_info fltr_info; +#define ICE_INVAL_COUNTER_ID 0xff + u8 counter_index; +}; + +/* VSI related commands */ +enum ice_status +ice_aq_add_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, + struct ice_sq_cd *cd); +enum ice_status +ice_aq_update_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, + struct ice_sq_cd *cd); +enum ice_status +ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, + bool keep_vsi_alloc, struct ice_sq_cd *cd); + +enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw); + +/* Switch/bridge related commands */ +enum ice_status ice_add_mac(struct ice_hw *hw, struct list_head *m_lst); +enum ice_status ice_remove_mac(struct ice_hw *hw, struct list_head *m_lst); +void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_id); +enum ice_status ice_add_vlan(struct ice_hw *hw, struct list_head *m_list); +enum ice_status ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list); +enum ice_status +ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_id, bool set, u8 direction); + +#endif /* _ICE_SWITCH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c new file mode 100644 index 000000000..1d84fedf1 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -0,0 +1,1783 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +/* The driver transmit and receive code */ + +#include <linux/prefetch.h> +#include <linux/mm.h> +#include "ice.h" + +#define ICE_RX_HDR_SIZE 256 + +/** + * ice_unmap_and_free_tx_buf - Release a Tx buffer + * @ring: the ring that owns the buffer + * @tx_buf: the buffer to free + */ +static void +ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf) +{ + if (tx_buf->skb) { + dev_kfree_skb_any(tx_buf->skb); + if (dma_unmap_len(tx_buf, len)) + dma_unmap_single(ring->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + } else if (dma_unmap_len(tx_buf, len)) { + dma_unmap_page(ring->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + } + + tx_buf->next_to_watch = NULL; + tx_buf->skb = NULL; + dma_unmap_len_set(tx_buf, len, 0); + /* tx_buf must be completely set up in the transmit path */ +} + +static struct netdev_queue *txring_txq(const struct ice_ring *ring) +{ + return netdev_get_tx_queue(ring->netdev, ring->q_index); +} + +/** + * ice_clean_tx_ring - Free any empty Tx buffers + * @tx_ring: ring to be cleaned + */ +void ice_clean_tx_ring(struct ice_ring *tx_ring) +{ + unsigned long size; + u16 i; + + /* ring already cleared, nothing to do */ + if (!tx_ring->tx_buf) + return; + + /* Free all the Tx ring sk_bufss */ + for (i = 0; i < tx_ring->count; i++) + ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]); + + size = sizeof(struct ice_tx_buf) * tx_ring->count; + memset(tx_ring->tx_buf, 0, size); + + /* Zero out the descriptor ring */ + memset(tx_ring->desc, 0, tx_ring->size); + + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + + if (!tx_ring->netdev) + return; + + /* cleanup Tx queue statistics */ + netdev_tx_reset_queue(txring_txq(tx_ring)); +} + +/** + * ice_free_tx_ring - Free Tx resources per queue + * @tx_ring: Tx descriptor ring for a specific queue + * + * Free all transmit software resources + */ +void ice_free_tx_ring(struct ice_ring *tx_ring) +{ + ice_clean_tx_ring(tx_ring); + devm_kfree(tx_ring->dev, tx_ring->tx_buf); + tx_ring->tx_buf = NULL; + + if (tx_ring->desc) { + dmam_free_coherent(tx_ring->dev, tx_ring->size, + tx_ring->desc, tx_ring->dma); + tx_ring->desc = NULL; + } +} + +/** + * ice_clean_tx_irq - Reclaim resources after transmit completes + * @vsi: the VSI we care about + * @tx_ring: Tx ring to clean + * @napi_budget: Used to determine if we are in netpoll + * + * Returns true if there's any budget left (e.g. the clean is finished) + */ +static bool ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring, + int napi_budget) +{ + unsigned int total_bytes = 0, total_pkts = 0; + unsigned int budget = vsi->work_lmt; + s16 i = tx_ring->next_to_clean; + struct ice_tx_desc *tx_desc; + struct ice_tx_buf *tx_buf; + + tx_buf = &tx_ring->tx_buf[i]; + tx_desc = ICE_TX_DESC(tx_ring, i); + i -= tx_ring->count; + + do { + struct ice_tx_desc *eop_desc = tx_buf->next_to_watch; + + /* if next_to_watch is not set then there is no work pending */ + if (!eop_desc) + break; + + smp_rmb(); /* prevent any other reads prior to eop_desc */ + + /* if the descriptor isn't done, no work yet to do */ + if (!(eop_desc->cmd_type_offset_bsz & + cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) + break; + + /* clear next_to_watch to prevent false hangs */ + tx_buf->next_to_watch = NULL; + + /* update the statistics for this packet */ + total_bytes += tx_buf->bytecount; + total_pkts += tx_buf->gso_segs; + + /* free the skb */ + napi_consume_skb(tx_buf->skb, napi_budget); + + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + + /* clear tx_buf data */ + tx_buf->skb = NULL; + dma_unmap_len_set(tx_buf, len, 0); + + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { + tx_buf++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buf = tx_ring->tx_buf; + tx_desc = ICE_TX_DESC(tx_ring, 0); + } + + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buf, len)) { + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); + } + } + + /* move us one more past the eop_desc for start of next pkt */ + tx_buf++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buf = tx_ring->tx_buf; + tx_desc = ICE_TX_DESC(tx_ring, 0); + } + + prefetch(tx_desc); + + /* update budget accounting */ + budget--; + } while (likely(budget)); + + i += tx_ring->count; + tx_ring->next_to_clean = i; + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->stats.bytes += total_bytes; + tx_ring->stats.pkts += total_pkts; + u64_stats_update_end(&tx_ring->syncp); + tx_ring->q_vector->tx.total_bytes += total_bytes; + tx_ring->q_vector->tx.total_pkts += total_pkts; + + netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts, + total_bytes); + +#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2)) + if (unlikely(total_pkts && netif_carrier_ok(tx_ring->netdev) && + (ICE_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { + /* Make sure that anybody stopping the queue after this + * sees the new next_to_clean. + */ + smp_mb(); + if (__netif_subqueue_stopped(tx_ring->netdev, + tx_ring->q_index) && + !test_bit(__ICE_DOWN, vsi->state)) { + netif_wake_subqueue(tx_ring->netdev, + tx_ring->q_index); + ++tx_ring->tx_stats.restart_q; + } + } + + return !!budget; +} + +/** + * ice_setup_tx_ring - Allocate the Tx descriptors + * @tx_ring: the tx ring to set up + * + * Return 0 on success, negative on error + */ +int ice_setup_tx_ring(struct ice_ring *tx_ring) +{ + struct device *dev = tx_ring->dev; + int bi_size; + + if (!dev) + return -ENOMEM; + + /* warn if we are about to overwrite the pointer */ + WARN_ON(tx_ring->tx_buf); + bi_size = sizeof(struct ice_tx_buf) * tx_ring->count; + tx_ring->tx_buf = devm_kzalloc(dev, bi_size, GFP_KERNEL); + if (!tx_ring->tx_buf) + return -ENOMEM; + + /* round up to nearest 4K */ + tx_ring->size = tx_ring->count * sizeof(struct ice_tx_desc); + tx_ring->size = ALIGN(tx_ring->size, 4096); + tx_ring->desc = dmam_alloc_coherent(dev, tx_ring->size, &tx_ring->dma, + GFP_KERNEL); + if (!tx_ring->desc) { + dev_err(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n", + tx_ring->size); + goto err; + } + + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + return 0; + +err: + devm_kfree(dev, tx_ring->tx_buf); + tx_ring->tx_buf = NULL; + return -ENOMEM; +} + +/** + * ice_clean_rx_ring - Free Rx buffers + * @rx_ring: ring to be cleaned + */ +void ice_clean_rx_ring(struct ice_ring *rx_ring) +{ + struct device *dev = rx_ring->dev; + unsigned long size; + u16 i; + + /* ring already cleared, nothing to do */ + if (!rx_ring->rx_buf) + return; + + /* Free all the Rx ring sk_buffs */ + for (i = 0; i < rx_ring->count; i++) { + struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i]; + + if (rx_buf->skb) { + dev_kfree_skb(rx_buf->skb); + rx_buf->skb = NULL; + } + if (!rx_buf->page) + continue; + + dma_unmap_page(dev, rx_buf->dma, PAGE_SIZE, DMA_FROM_DEVICE); + __free_pages(rx_buf->page, 0); + + rx_buf->page = NULL; + rx_buf->page_offset = 0; + } + + size = sizeof(struct ice_rx_buf) * rx_ring->count; + memset(rx_ring->rx_buf, 0, size); + + /* Zero out the descriptor ring */ + memset(rx_ring->desc, 0, rx_ring->size); + + rx_ring->next_to_alloc = 0; + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; +} + +/** + * ice_free_rx_ring - Free Rx resources + * @rx_ring: ring to clean the resources from + * + * Free all receive software resources + */ +void ice_free_rx_ring(struct ice_ring *rx_ring) +{ + ice_clean_rx_ring(rx_ring); + devm_kfree(rx_ring->dev, rx_ring->rx_buf); + rx_ring->rx_buf = NULL; + + if (rx_ring->desc) { + dmam_free_coherent(rx_ring->dev, rx_ring->size, + rx_ring->desc, rx_ring->dma); + rx_ring->desc = NULL; + } +} + +/** + * ice_setup_rx_ring - Allocate the Rx descriptors + * @rx_ring: the rx ring to set up + * + * Return 0 on success, negative on error + */ +int ice_setup_rx_ring(struct ice_ring *rx_ring) +{ + struct device *dev = rx_ring->dev; + int bi_size; + + if (!dev) + return -ENOMEM; + + /* warn if we are about to overwrite the pointer */ + WARN_ON(rx_ring->rx_buf); + bi_size = sizeof(struct ice_rx_buf) * rx_ring->count; + rx_ring->rx_buf = devm_kzalloc(dev, bi_size, GFP_KERNEL); + if (!rx_ring->rx_buf) + return -ENOMEM; + + /* round up to nearest 4K */ + rx_ring->size = rx_ring->count * sizeof(union ice_32byte_rx_desc); + rx_ring->size = ALIGN(rx_ring->size, 4096); + rx_ring->desc = dmam_alloc_coherent(dev, rx_ring->size, &rx_ring->dma, + GFP_KERNEL); + if (!rx_ring->desc) { + dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", + rx_ring->size); + goto err; + } + + rx_ring->next_to_use = 0; + rx_ring->next_to_clean = 0; + return 0; + +err: + devm_kfree(dev, rx_ring->rx_buf); + rx_ring->rx_buf = NULL; + return -ENOMEM; +} + +/** + * ice_release_rx_desc - Store the new tail and head values + * @rx_ring: ring to bump + * @val: new head index + */ +static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val) +{ + rx_ring->next_to_use = val; + + /* update next to alloc since we have filled the ring */ + rx_ring->next_to_alloc = val; + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + writel(val, rx_ring->tail); +} + +/** + * ice_alloc_mapped_page - recycle or make a new page + * @rx_ring: ring to use + * @bi: rx_buf struct to modify + * + * Returns true if the page was successfully allocated or + * reused. + */ +static bool ice_alloc_mapped_page(struct ice_ring *rx_ring, + struct ice_rx_buf *bi) +{ + struct page *page = bi->page; + dma_addr_t dma; + + /* since we are recycling buffers we should seldom need to alloc */ + if (likely(page)) { + rx_ring->rx_stats.page_reuse_count++; + return true; + } + + /* alloc new page for storage */ + page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!page)) { + rx_ring->rx_stats.alloc_page_failed++; + return false; + } + + /* map page for use */ + dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + + /* if mapping failed free memory back to system since + * there isn't much point in holding memory we can't use + */ + if (dma_mapping_error(rx_ring->dev, dma)) { + __free_pages(page, 0); + rx_ring->rx_stats.alloc_page_failed++; + return false; + } + + bi->dma = dma; + bi->page = page; + bi->page_offset = 0; + + return true; +} + +/** + * ice_alloc_rx_bufs - Replace used receive buffers + * @rx_ring: ring to place buffers on + * @cleaned_count: number of buffers to replace + * + * Returns false if all allocations were successful, true if any fail + */ +bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count) +{ + union ice_32b_rx_flex_desc *rx_desc; + u16 ntu = rx_ring->next_to_use; + struct ice_rx_buf *bi; + + /* do nothing if no valid netdev defined */ + if (!rx_ring->netdev || !cleaned_count) + return false; + + /* get the RX descriptor and buffer based on next_to_use */ + rx_desc = ICE_RX_DESC(rx_ring, ntu); + bi = &rx_ring->rx_buf[ntu]; + + do { + if (!ice_alloc_mapped_page(rx_ring, bi)) + goto no_bufs; + + /* Refresh the desc even if buffer_addrs didn't change + * because each write-back erases this info. + */ + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); + + rx_desc++; + bi++; + ntu++; + if (unlikely(ntu == rx_ring->count)) { + rx_desc = ICE_RX_DESC(rx_ring, 0); + bi = rx_ring->rx_buf; + ntu = 0; + } + + /* clear the status bits for the next_to_use descriptor */ + rx_desc->wb.status_error0 = 0; + + cleaned_count--; + } while (cleaned_count); + + if (rx_ring->next_to_use != ntu) + ice_release_rx_desc(rx_ring, ntu); + + return false; + +no_bufs: + if (rx_ring->next_to_use != ntu) + ice_release_rx_desc(rx_ring, ntu); + + /* make sure to come back via polling to try again after + * allocation failure + */ + return true; +} + +/** + * ice_page_is_reserved - check if reuse is possible + * @page: page struct to check + */ +static bool ice_page_is_reserved(struct page *page) +{ + return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); +} + +/** + * ice_add_rx_frag - Add contents of Rx buffer to sk_buff + * @rx_buf: buffer containing page to add + * @rx_desc: descriptor containing length of buffer written by hardware + * @skb: sk_buf to place the data into + * + * This function will add the data contained in rx_buf->page to the skb. + * This is done either through a direct copy if the data in the buffer is + * less than the skb header size, otherwise it will just attach the page as + * a frag to the skb. + * + * The function will then update the page offset if necessary and return + * true if the buffer can be reused by the adapter. + */ +static bool ice_add_rx_frag(struct ice_rx_buf *rx_buf, + union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb) +{ +#if (PAGE_SIZE < 8192) + unsigned int truesize = ICE_RXBUF_2048; +#else + unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048; + unsigned int truesize; +#endif /* PAGE_SIZE < 8192) */ + + struct page *page; + unsigned int size; + + size = le16_to_cpu(rx_desc->wb.pkt_len) & + ICE_RX_FLX_DESC_PKT_LEN_M; + + page = rx_buf->page; + +#if (PAGE_SIZE >= 8192) + truesize = ALIGN(size, L1_CACHE_BYTES); +#endif /* PAGE_SIZE >= 8192) */ + + /* will the data fit in the skb we allocated? if so, just + * copy it as it is pretty small anyway + */ + if (size <= ICE_RX_HDR_SIZE && !skb_is_nonlinear(skb)) { + unsigned char *va = page_address(page) + rx_buf->page_offset; + + memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); + + /* page is not reserved, we can reuse buffer as-is */ + if (likely(!ice_page_is_reserved(page))) + return true; + + /* this page cannot be reused so discard it */ + __free_pages(page, 0); + return false; + } + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + rx_buf->page_offset, size, truesize); + + /* avoid re-using remote pages */ + if (unlikely(ice_page_is_reserved(page))) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buf->page_offset ^= truesize; +#else + /* move offset up to the next cache line */ + rx_buf->page_offset += truesize; + + if (rx_buf->page_offset > last_offset) + return false; +#endif /* PAGE_SIZE < 8192) */ + + /* Even if we own the page, we are not allowed to use atomic_set() + * This would break get_page_unless_zero() users. + */ + get_page(rx_buf->page); + + return true; +} + +/** + * ice_reuse_rx_page - page flip buffer and store it back on the ring + * @rx_ring: rx descriptor ring to store buffers on + * @old_buf: donor buffer to have page reused + * + * Synchronizes page for reuse by the adapter + */ +static void ice_reuse_rx_page(struct ice_ring *rx_ring, + struct ice_rx_buf *old_buf) +{ + u16 nta = rx_ring->next_to_alloc; + struct ice_rx_buf *new_buf; + + new_buf = &rx_ring->rx_buf[nta]; + + /* update, and store next to alloc */ + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + /* transfer page from old buffer to new buffer */ + *new_buf = *old_buf; +} + +/** + * ice_fetch_rx_buf - Allocate skb and populate it + * @rx_ring: rx descriptor ring to transact packets on + * @rx_desc: descriptor containing info written by hardware + * + * This function allocates an skb on the fly, and populates it with the page + * data from the current receive descriptor, taking care to set up the skb + * correctly, as well as handling calling the page recycle function if + * necessary. + */ +static struct sk_buff *ice_fetch_rx_buf(struct ice_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc) +{ + struct ice_rx_buf *rx_buf; + struct sk_buff *skb; + struct page *page; + + rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; + page = rx_buf->page; + prefetchw(page); + + skb = rx_buf->skb; + + if (likely(!skb)) { + u8 *page_addr = page_address(page) + rx_buf->page_offset; + + /* prefetch first cache line of first page */ + prefetch(page_addr); +#if L1_CACHE_BYTES < 128 + prefetch((void *)(page_addr + L1_CACHE_BYTES)); +#endif /* L1_CACHE_BYTES */ + + /* allocate a skb to store the frags */ + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, + ICE_RX_HDR_SIZE, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!skb)) { + rx_ring->rx_stats.alloc_buf_failed++; + return NULL; + } + + /* we will be copying header into skb->data in + * pskb_may_pull so it is in our interest to prefetch + * it now to avoid a possible cache miss + */ + prefetchw(skb->data); + + skb_record_rx_queue(skb, rx_ring->q_index); + } else { + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, + rx_buf->page_offset, + ICE_RXBUF_2048, + DMA_FROM_DEVICE); + + rx_buf->skb = NULL; + } + + /* pull page into skb */ + if (ice_add_rx_frag(rx_buf, rx_desc, skb)) { + /* hand second half of page back to the ring */ + ice_reuse_rx_page(rx_ring, rx_buf); + rx_ring->rx_stats.page_reuse_count++; + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page(rx_ring->dev, rx_buf->dma, PAGE_SIZE, + DMA_FROM_DEVICE); + } + + /* clear contents of buffer_info */ + rx_buf->page = NULL; + + return skb; +} + +/** + * ice_pull_tail - ice specific version of skb_pull_tail + * @skb: pointer to current skb being adjusted + * + * This function is an ice specific version of __pskb_pull_tail. The + * main difference between this version and the original function is that + * this function can make several assumptions about the state of things + * that allow for significant optimizations versus the standard function. + * As a result we can do things like drop a frag and maintain an accurate + * truesize for the skb. + */ +static void ice_pull_tail(struct sk_buff *skb) +{ + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned int pull_len; + unsigned char *va; + + /* it is valid to use page_address instead of kmap since we are + * working with pages allocated out of the lomem pool per + * alloc_page(GFP_ATOMIC) + */ + va = skb_frag_address(frag); + + /* we need the header to contain the greater of either ETH_HLEN or + * 60 bytes if the skb->len is less than 60 for skb_pad. + */ + pull_len = eth_get_headlen(va, ICE_RX_HDR_SIZE); + + /* align pull length to size of long to optimize memcpy performance */ + skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); + + /* update all of the pointers */ + skb_frag_size_sub(frag, pull_len); + frag->page_offset += pull_len; + skb->data_len -= pull_len; + skb->tail += pull_len; +} + +/** + * ice_cleanup_headers - Correct empty headers + * @skb: pointer to current skb being fixed + * + * Also address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. + * + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. + * + * Returns true if an error was encountered and skb was freed. + */ +static bool ice_cleanup_headers(struct sk_buff *skb) +{ + /* place header in linear portion of buffer */ + if (skb_is_nonlinear(skb)) + ice_pull_tail(skb); + + /* if eth_skb_pad returns an error the skb was freed */ + if (eth_skb_pad(skb)) + return true; + + return false; +} + +/** + * ice_test_staterr - tests bits in Rx descriptor status and error fields + * @rx_desc: pointer to receive descriptor (in le64 format) + * @stat_err_bits: value to mask + * + * This function does some fast chicanery in order to return the + * value of the mask which is really only used for boolean tests. + * The status_error_len doesn't need to be shifted because it begins + * at offset zero. + */ +static bool ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, + const u16 stat_err_bits) +{ + return !!(rx_desc->wb.status_error0 & + cpu_to_le16(stat_err_bits)); +} + +/** + * ice_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * @skb: Current socket buffer containing buffer in progress + * + * This function updates next to clean. If the buffer is an EOP buffer + * this function exits returning false, otherwise it will place the + * sk_buff in the next buffer to be chained and return true indicating + * that this is in fact a non-EOP buffer. + */ +static bool ice_is_non_eop(struct ice_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb) +{ + u32 ntc = rx_ring->next_to_clean + 1; + + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + + prefetch(ICE_RX_DESC(rx_ring, ntc)); + + /* if we are the last buffer then there is nothing else to do */ +#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S) + if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF))) + return false; + + /* place skb in next buffer to be received */ + rx_ring->rx_buf[ntc].skb = skb; + rx_ring->rx_stats.non_eop_descs++; + + return true; +} + +/** + * ice_ptype_to_htype - get a hash type + * @ptype: the ptype value from the descriptor + * + * Returns a hash type to be used by skb_set_hash + */ +static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype) +{ + return PKT_HASH_TYPE_NONE; +} + +/** + * ice_rx_hash - set the hash value in the skb + * @rx_ring: descriptor ring + * @rx_desc: specific descriptor + * @skb: pointer to current skb + * @rx_ptype: the ptype value from the descriptor + */ +static void +ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb, u8 rx_ptype) +{ + struct ice_32b_rx_flex_desc_nic *nic_mdid; + u32 hash; + + if (!(rx_ring->netdev->features & NETIF_F_RXHASH)) + return; + + if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC) + return; + + nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc; + hash = le32_to_cpu(nic_mdid->rss_hash); + skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype)); +} + +/** + * ice_rx_csum - Indicate in skb if checksum is good + * @vsi: the VSI we care about + * @skb: skb currently being received and modified + * @rx_desc: the receive descriptor + * @ptype: the packet type decoded by hardware + * + * skb->protocol must be set before this function is called + */ +static void ice_rx_csum(struct ice_vsi *vsi, struct sk_buff *skb, + union ice_32b_rx_flex_desc *rx_desc, u8 ptype) +{ + struct ice_rx_ptype_decoded decoded; + u32 rx_error, rx_status; + bool ipv4, ipv6; + + rx_status = le16_to_cpu(rx_desc->wb.status_error0); + rx_error = rx_status; + + decoded = ice_decode_rx_desc_ptype(ptype); + + /* Start with CHECKSUM_NONE and by default csum_level = 0 */ + skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); + + /* check if Rx checksum is enabled */ + if (!(vsi->netdev->features & NETIF_F_RXCSUM)) + return; + + /* check if HW has decoded the packet and checksum */ + if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) + return; + + if (!(decoded.known && decoded.outer_ip)) + return; + + ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4); + ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6); + + if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | + BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))) + goto checksum_fail; + else if (ipv6 && (rx_status & + (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S)))) + goto checksum_fail; + + /* check for L4 errors and handle packets that were not able to be + * checksummed due to arrival speed + */ + if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)) + goto checksum_fail; + + /* Only report checksum unnecessary for TCP, UDP, or SCTP */ + switch (decoded.inner_prot) { + case ICE_RX_PTYPE_INNER_PROT_TCP: + case ICE_RX_PTYPE_INNER_PROT_UDP: + case ICE_RX_PTYPE_INNER_PROT_SCTP: + skb->ip_summed = CHECKSUM_UNNECESSARY; + default: + break; + } + return; + +checksum_fail: + vsi->back->hw_csum_rx_error++; +} + +/** + * ice_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated + * @ptype: the packet type decoded by hardware + * + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, VLAN, protocol, and + * other fields within the skb. + */ +static void ice_process_skb_fields(struct ice_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb, u8 ptype) +{ + ice_rx_hash(rx_ring, rx_desc, skb, ptype); + + /* modifies the skb - consumes the enet header */ + skb->protocol = eth_type_trans(skb, rx_ring->netdev); + + ice_rx_csum(rx_ring->vsi, skb, rx_desc, ptype); +} + +/** + * ice_receive_skb - Send a completed packet up the stack + * @rx_ring: rx ring in play + * @skb: packet to send up + * @vlan_tag: vlan tag for packet + * + * This function sends the completed packet (via. skb) up the stack using + * gro receive functions (with/without vlan tag) + */ +static void ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, + u16 vlan_tag) +{ + if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && + (vlan_tag & VLAN_VID_MASK)) { + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); + } + napi_gro_receive(&rx_ring->q_vector->napi, skb); +} + +/** + * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf + * @rx_ring: rx descriptor ring to transact packets on + * @budget: Total limit on number of packets to process + * + * This function provides a "bounce buffer" approach to Rx interrupt + * processing. The advantage to this is that on systems that have + * expensive overhead for IOMMU access this provides a means of avoiding + * it by maintaining the mapping of the page to the system. + * + * Returns amount of work completed + */ +static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) +{ + unsigned int total_rx_bytes = 0, total_rx_pkts = 0; + u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); + bool failure = false; + + /* start the loop to process RX packets bounded by 'budget' */ + while (likely(total_rx_pkts < (unsigned int)budget)) { + union ice_32b_rx_flex_desc *rx_desc; + struct sk_buff *skb; + u16 stat_err_bits; + u16 vlan_tag = 0; + u8 rx_ptype; + + /* return some buffers to hardware, one at a time is too slow */ + if (cleaned_count >= ICE_RX_BUF_WRITE) { + failure = failure || + ice_alloc_rx_bufs(rx_ring, cleaned_count); + cleaned_count = 0; + } + + /* get the RX desc from RX ring based on 'next_to_clean' */ + rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean); + + /* status_error_len will always be zero for unused descriptors + * because it's cleared in cleanup, and overlaps with hdr_addr + * which is always zero because packet split isn't used, if the + * hardware wrote DD then it will be non-zero + */ + stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S); + if (!ice_test_staterr(rx_desc, stat_err_bits)) + break; + + /* This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we know the + * DD bit is set. + */ + dma_rmb(); + + /* allocate (if needed) and populate skb */ + skb = ice_fetch_rx_buf(rx_ring, rx_desc); + if (!skb) + break; + + cleaned_count++; + + /* skip if it is NOP desc */ + if (ice_is_non_eop(rx_ring, rx_desc, skb)) + continue; + + stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); + if (unlikely(ice_test_staterr(rx_desc, stat_err_bits))) { + dev_kfree_skb_any(skb); + continue; + } + + rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) & + ICE_RX_FLEX_DESC_PTYPE_M; + + stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S); + if (ice_test_staterr(rx_desc, stat_err_bits)) + vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1); + + /* correct empty headers and pad skb if needed (to make valid + * ethernet frame + */ + if (ice_cleanup_headers(skb)) { + skb = NULL; + continue; + } + + /* probably a little skewed due to removing CRC */ + total_rx_bytes += skb->len; + + /* populate checksum, VLAN, and protocol */ + ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); + + /* send completed skb up the stack */ + ice_receive_skb(rx_ring, skb, vlan_tag); + + /* update budget accounting */ + total_rx_pkts++; + } + + /* update queue and vector specific stats */ + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->stats.pkts += total_rx_pkts; + rx_ring->stats.bytes += total_rx_bytes; + u64_stats_update_end(&rx_ring->syncp); + rx_ring->q_vector->rx.total_pkts += total_rx_pkts; + rx_ring->q_vector->rx.total_bytes += total_rx_bytes; + + /* guarantee a trip back through this routine if there was a failure */ + return failure ? budget : (int)total_rx_pkts; +} + +/** + * ice_napi_poll - NAPI polling Rx/Tx cleanup routine + * @napi: napi struct with our devices info in it + * @budget: amount of work driver is allowed to do this pass, in packets + * + * This function will clean all queues associated with a q_vector. + * + * Returns the amount of work done + */ +int ice_napi_poll(struct napi_struct *napi, int budget) +{ + struct ice_q_vector *q_vector = + container_of(napi, struct ice_q_vector, napi); + struct ice_vsi *vsi = q_vector->vsi; + struct ice_pf *pf = vsi->back; + bool clean_complete = true; + int budget_per_ring = 0; + struct ice_ring *ring; + int work_done = 0; + + /* Since the actual Tx work is minimal, we can give the Tx a larger + * budget and be more aggressive about cleaning up the Tx descriptors. + */ + ice_for_each_ring(ring, q_vector->tx) + if (!ice_clean_tx_irq(vsi, ring, budget)) + clean_complete = false; + + /* Handle case where we are called by netpoll with a budget of 0 */ + if (budget <= 0) + return budget; + + /* We attempt to distribute budget to each Rx queue fairly, but don't + * allow the budget to go below 1 because that would exit polling early. + */ + if (q_vector->num_ring_rx) + budget_per_ring = max(budget / q_vector->num_ring_rx, 1); + + ice_for_each_ring(ring, q_vector->rx) { + int cleaned; + + cleaned = ice_clean_rx_irq(ring, budget_per_ring); + work_done += cleaned; + /* if we clean as many as budgeted, we must not be done */ + if (cleaned >= budget_per_ring) + clean_complete = false; + } + + /* If work not completed, return budget and polling will return */ + if (!clean_complete) + return budget; + + /* Work is done so exit the polling mode and re-enable the interrupt */ + napi_complete_done(napi, work_done); + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) + ice_irq_dynamic_ena(&vsi->back->hw, vsi, q_vector); + + return min(work_done, budget - 1); +} + +/* helper function for building cmd/type/offset */ +static __le64 +build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag) +{ + return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA | + (td_cmd << ICE_TXD_QW1_CMD_S) | + (td_offset << ICE_TXD_QW1_OFFSET_S) | + ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) | + (td_tag << ICE_TXD_QW1_L2TAG1_S)); +} + +/** + * __ice_maybe_stop_tx - 2nd level check for tx stop conditions + * @tx_ring: the ring to be checked + * @size: the size buffer we want to assure is available + * + * Returns -EBUSY if a stop is needed, else 0 + */ +static int __ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size) +{ + netif_stop_subqueue(tx_ring->netdev, tx_ring->q_index); + /* Memory barrier before checking head and tail */ + smp_mb(); + + /* Check again in a case another CPU has just made room available. */ + if (likely(ICE_DESC_UNUSED(tx_ring) < size)) + return -EBUSY; + + /* A reprieve! - use start_subqueue because it doesn't call schedule */ + netif_start_subqueue(tx_ring->netdev, tx_ring->q_index); + ++tx_ring->tx_stats.restart_q; + return 0; +} + +/** + * ice_maybe_stop_tx - 1st level check for tx stop conditions + * @tx_ring: the ring to be checked + * @size: the size buffer we want to assure is available + * + * Returns 0 if stop is not needed + */ +static int ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size) +{ + if (likely(ICE_DESC_UNUSED(tx_ring) >= size)) + return 0; + return __ice_maybe_stop_tx(tx_ring, size); +} + +/** + * ice_tx_map - Build the Tx descriptor + * @tx_ring: ring to send buffer on + * @first: first buffer info buffer to use + * @off: pointer to struct that holds offload parameters + * + * This function loops over the skb data pointed to by *first + * and gets a physical address for each memory location and programs + * it and the length into the transmit descriptor. + */ +static void +ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first, + struct ice_tx_offload_params *off) +{ + u64 td_offset, td_tag, td_cmd; + u16 i = tx_ring->next_to_use; + struct skb_frag_struct *frag; + unsigned int data_len, size; + struct ice_tx_desc *tx_desc; + struct ice_tx_buf *tx_buf; + struct sk_buff *skb; + dma_addr_t dma; + + td_tag = off->td_l2tag1; + td_cmd = off->td_cmd; + td_offset = off->td_offset; + skb = first->skb; + + data_len = skb->data_len; + size = skb_headlen(skb); + + tx_desc = ICE_TX_DESC(tx_ring, i); + + if (first->tx_flags & ICE_TX_FLAGS_HW_VLAN) { + td_cmd |= (u64)ICE_TX_DESC_CMD_IL2TAG1; + td_tag = (first->tx_flags & ICE_TX_FLAGS_VLAN_M) >> + ICE_TX_FLAGS_VLAN_S; + } + + dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); + + tx_buf = first; + + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { + unsigned int max_data = ICE_MAX_DATA_PER_TXD_ALIGNED; + + if (dma_mapping_error(tx_ring->dev, dma)) + goto dma_error; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_buf, len, size); + dma_unmap_addr_set(tx_buf, dma, dma); + + /* align size to end of page */ + max_data += -dma & (ICE_MAX_READ_REQ_SIZE - 1); + tx_desc->buf_addr = cpu_to_le64(dma); + + /* account for data chunks larger than the hardware + * can handle + */ + while (unlikely(size > ICE_MAX_DATA_PER_TXD)) { + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, max_data, td_tag); + + tx_desc++; + i++; + + if (i == tx_ring->count) { + tx_desc = ICE_TX_DESC(tx_ring, 0); + i = 0; + } + + dma += max_data; + size -= max_data; + + max_data = ICE_MAX_DATA_PER_TXD_ALIGNED; + tx_desc->buf_addr = cpu_to_le64(dma); + } + + if (likely(!data_len)) + break; + + tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, + size, td_tag); + + tx_desc++; + i++; + + if (i == tx_ring->count) { + tx_desc = ICE_TX_DESC(tx_ring, 0); + i = 0; + } + + size = skb_frag_size(frag); + data_len -= size; + + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, + DMA_TO_DEVICE); + + tx_buf = &tx_ring->tx_buf[i]; + } + + /* record bytecount for BQL */ + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); + + /* record SW timestamp if HW timestamp is not available */ + skb_tx_timestamp(first->skb); + + i++; + if (i == tx_ring->count) + i = 0; + + /* write last descriptor with RS and EOP bits */ + td_cmd |= (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS); + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, size, td_tag); + + /* Force memory writes to complete before letting h/w know there + * are new descriptors to fetch. + * + * We also use this memory barrier to make certain all of the + * status bits have been updated before next_to_watch is written. + */ + wmb(); + + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; + + tx_ring->next_to_use = i; + + ice_maybe_stop_tx(tx_ring, DESC_NEEDED); + + /* notify HW of packet */ + if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) { + writel(i, tx_ring->tail); + + /* we need this if more than one processor can write to our tail + * at a time, it synchronizes IO on IA64/Altix systems + */ + mmiowb(); + } + + return; + +dma_error: + /* clear dma mappings for failed tx_buf map */ + for (;;) { + tx_buf = &tx_ring->tx_buf[i]; + ice_unmap_and_free_tx_buf(tx_ring, tx_buf); + if (tx_buf == first) + break; + if (i == 0) + i = tx_ring->count; + i--; + } + + tx_ring->next_to_use = i; +} + +/** + * ice_tx_csum - Enable Tx checksum offloads + * @first: pointer to the first descriptor + * @off: pointer to struct that holds offload parameters + * + * Returns 0 or error (negative) if checksum offload can't happen, 1 otherwise. + */ +static +int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off) +{ + u32 l4_len = 0, l3_len = 0, l2_len = 0; + struct sk_buff *skb = first->skb; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + unsigned char *hdr; + } l4; + __be16 frag_off, protocol; + unsigned char *exthdr; + u32 offset, cmd = 0; + u8 l4_proto = 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* compute outer L2 header size */ + l2_len = ip.hdr - skb->data; + offset = (l2_len / 2) << ICE_TX_DESC_LEN_MACLEN_S; + + if (skb->encapsulation) + return -1; + + /* Enable IP checksum offloads */ + protocol = vlan_get_protocol(skb); + if (protocol == htons(ETH_P_IP)) { + l4_proto = ip.v4->protocol; + /* the stack computes the IP header already, the only time we + * need the hardware to recompute it is in the case of TSO. + */ + if (first->tx_flags & ICE_TX_FLAGS_TSO) + cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM; + else + cmd |= ICE_TX_DESC_CMD_IIPT_IPV4; + + } else if (protocol == htons(ETH_P_IPV6)) { + cmd |= ICE_TX_DESC_CMD_IIPT_IPV6; + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, &l4_proto, + &frag_off); + } else { + return -1; + } + + /* compute inner L3 header size */ + l3_len = l4.hdr - ip.hdr; + offset |= (l3_len / 4) << ICE_TX_DESC_LEN_IPLEN_S; + + /* Enable L4 checksum offloads */ + switch (l4_proto) { + case IPPROTO_TCP: + /* enable checksum offloads */ + cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP; + l4_len = l4.tcp->doff; + offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S; + break; + case IPPROTO_UDP: + /* enable UDP checksum offload */ + cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP; + l4_len = (sizeof(struct udphdr) >> 2); + offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S; + break; + case IPPROTO_SCTP: + default: + if (first->tx_flags & ICE_TX_FLAGS_TSO) + return -1; + skb_checksum_help(skb); + return 0; + } + + off->td_cmd |= cmd; + off->td_offset |= offset; + return 1; +} + +/** + * ice_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW + * @tx_ring: ring to send buffer on + * @first: pointer to struct ice_tx_buf + * + * Checks the skb and set up correspondingly several generic transmit flags + * related to VLAN tagging for the HW, such as VLAN, DCB, etc. + * + * Returns error code indicate the frame should be dropped upon error and the + * otherwise returns 0 to indicate the flags has been set properly. + */ +static int +ice_tx_prepare_vlan_flags(struct ice_ring *tx_ring, struct ice_tx_buf *first) +{ + struct sk_buff *skb = first->skb; + __be16 protocol = skb->protocol; + + if (protocol == htons(ETH_P_8021Q) && + !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) { + /* when HW VLAN acceleration is turned off by the user the + * stack sets the protocol to 8021q so that the driver + * can take any steps required to support the SW only + * VLAN handling. In our case the driver doesn't need + * to take any further steps so just set the protocol + * to the encapsulated ethertype. + */ + skb->protocol = vlan_get_protocol(skb); + goto out; + } + + /* if we have a HW VLAN tag being added, default to the HW one */ + if (skb_vlan_tag_present(skb)) { + first->tx_flags |= skb_vlan_tag_get(skb) << ICE_TX_FLAGS_VLAN_S; + first->tx_flags |= ICE_TX_FLAGS_HW_VLAN; + } else if (protocol == htons(ETH_P_8021Q)) { + struct vlan_hdr *vhdr, _vhdr; + + /* for SW VLAN, check the next protocol and store the tag */ + vhdr = (struct vlan_hdr *)skb_header_pointer(skb, ETH_HLEN, + sizeof(_vhdr), + &_vhdr); + if (!vhdr) + return -EINVAL; + + first->tx_flags |= ntohs(vhdr->h_vlan_TCI) << + ICE_TX_FLAGS_VLAN_S; + first->tx_flags |= ICE_TX_FLAGS_SW_VLAN; + } + +out: + return 0; +} + +/** + * ice_tso - computes mss and TSO length to prepare for TSO + * @first: pointer to struct ice_tx_buf + * @off: pointer to struct that holds offload parameters + * + * Returns 0 or error (negative) if TSO can't happen, 1 otherwise. + */ +static +int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off) +{ + struct sk_buff *skb = first->skb; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + unsigned char *hdr; + } l4; + u64 cd_mss, cd_tso_len; + u32 paylen, l4_start; + int err; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + if (!skb_is_gso(skb)) + return 0; + + err = skb_cow_head(skb, 0); + if (err < 0) + return err; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* initialize outer IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; + } + + /* determine offset of transport header */ + l4_start = l4.hdr - skb->data; + + /* remove payload length from checksum */ + paylen = skb->len - l4_start; + csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); + + /* compute length of segmentation header */ + off->header_len = (l4.tcp->doff * 4) + l4_start; + + /* update gso_segs and bytecount */ + first->gso_segs = skb_shinfo(skb)->gso_segs; + first->bytecount += (first->gso_segs - 1) * off->header_len; + + cd_tso_len = skb->len - off->header_len; + cd_mss = skb_shinfo(skb)->gso_size; + + /* record cdesc_qw1 with TSO parameters */ + off->cd_qw1 |= ICE_TX_DESC_DTYPE_CTX | + (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) | + (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) | + (cd_mss << ICE_TXD_CTX_QW1_MSS_S); + first->tx_flags |= ICE_TX_FLAGS_TSO; + return 1; +} + +/** + * ice_txd_use_count - estimate the number of descriptors needed for Tx + * @size: transmit request size in bytes + * + * Due to hardware alignment restrictions (4K alignment), we need to + * assume that we can have no more than 12K of data per descriptor, even + * though each descriptor can take up to 16K - 1 bytes of aligned memory. + * Thus, we need to divide by 12K. But division is slow! Instead, + * we decompose the operation into shifts and one relatively cheap + * multiply operation. + * + * To divide by 12K, we first divide by 4K, then divide by 3: + * To divide by 4K, shift right by 12 bits + * To divide by 3, multiply by 85, then divide by 256 + * (Divide by 256 is done by shifting right by 8 bits) + * Finally, we add one to round up. Because 256 isn't an exact multiple of + * 3, we'll underestimate near each multiple of 12K. This is actually more + * accurate as we have 4K - 1 of wiggle room that we can fit into the last + * segment. For our purposes this is accurate out to 1M which is orders of + * magnitude greater than our largest possible GSO size. + * + * This would then be implemented as: + * return (((size >> 12) * 85) >> 8) + 1; + * + * Since multiplication and division are commutative, we can reorder + * operations into: + * return ((size * 85) >> 20) + 1; + */ +static unsigned int ice_txd_use_count(unsigned int size) +{ + return ((size * 85) >> 20) + 1; +} + +/** + * ice_xmit_desc_count - calculate number of tx descriptors needed + * @skb: send buffer + * + * Returns number of data descriptors needed for this skb. + */ +static unsigned int ice_xmit_desc_count(struct sk_buff *skb) +{ + const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + unsigned int count = 0, size = skb_headlen(skb); + + for (;;) { + count += ice_txd_use_count(size); + + if (!nr_frags--) + break; + + size = skb_frag_size(frag++); + } + + return count; +} + +/** + * __ice_chk_linearize - Check if there are more than 8 buffers per packet + * @skb: send buffer + * + * Note: This HW can't DMA more than 8 buffers to build a packet on the wire + * and so we need to figure out the cases where we need to linearize the skb. + * + * For TSO we need to count the TSO header and segment payload separately. + * As such we need to check cases where we have 7 fragments or more as we + * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for + * the segment payload in the first descriptor, and another 7 for the + * fragments. + */ +static bool __ice_chk_linearize(struct sk_buff *skb) +{ + const struct skb_frag_struct *frag, *stale; + int nr_frags, sum; + + /* no need to check if number of frags is less than 7 */ + nr_frags = skb_shinfo(skb)->nr_frags; + if (nr_frags < (ICE_MAX_BUF_TXD - 1)) + return false; + + /* We need to walk through the list and validate that each group + * of 6 fragments totals at least gso_size. + */ + nr_frags -= ICE_MAX_BUF_TXD - 2; + frag = &skb_shinfo(skb)->frags[0]; + + /* Initialize size to the negative value of gso_size minus 1. We + * use this as the worst case scenerio in which the frag ahead + * of us only provides one byte which is why we are limited to 6 + * descriptors for a single transmit as the header and previous + * fragment are already consuming 2 descriptors. + */ + sum = 1 - skb_shinfo(skb)->gso_size; + + /* Add size of frags 0 through 4 to create our initial sum */ + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + + /* Walk through fragments adding latest fragment, testing it, and + * then removing stale fragments from the sum. + */ + stale = &skb_shinfo(skb)->frags[0]; + for (;;) { + sum += skb_frag_size(frag++); + + /* if sum is negative we failed to make sufficient progress */ + if (sum < 0) + return true; + + if (!nr_frags--) + break; + + sum -= skb_frag_size(stale++); + } + + return false; +} + +/** + * ice_chk_linearize - Check if there are more than 8 fragments per packet + * @skb: send buffer + * @count: number of buffers used + * + * Note: Our HW can't scatter-gather more than 8 fragments to build + * a packet on the wire and so we need to figure out the cases where we + * need to linearize the skb. + */ +static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count) +{ + /* Both TSO and single send will work if count is less than 8 */ + if (likely(count < ICE_MAX_BUF_TXD)) + return false; + + if (skb_is_gso(skb)) + return __ice_chk_linearize(skb); + + /* we can support up to 8 data buffers for a single send */ + return count != ICE_MAX_BUF_TXD; +} + +/** + * ice_xmit_frame_ring - Sends buffer on Tx ring + * @skb: send buffer + * @tx_ring: ring to send buffer on + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +static netdev_tx_t +ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) +{ + struct ice_tx_offload_params offload = { 0 }; + struct ice_tx_buf *first; + unsigned int count; + int tso, csum; + + count = ice_xmit_desc_count(skb); + if (ice_chk_linearize(skb, count)) { + if (__skb_linearize(skb)) + goto out_drop; + count = ice_txd_use_count(skb->len); + tx_ring->tx_stats.tx_linearize++; + } + + /* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD, + * + 1 desc for skb_head_len/ICE_MAX_DATA_PER_TXD, + * + 4 desc gap to avoid the cache line where head is, + * + 1 desc for context descriptor, + * otherwise try next time + */ + if (ice_maybe_stop_tx(tx_ring, count + 4 + 1)) { + tx_ring->tx_stats.tx_busy++; + return NETDEV_TX_BUSY; + } + + offload.tx_ring = tx_ring; + + /* record the location of the first descriptor for this packet */ + first = &tx_ring->tx_buf[tx_ring->next_to_use]; + first->skb = skb; + first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); + first->gso_segs = 1; + first->tx_flags = 0; + + /* prepare the VLAN tagging flags for Tx */ + if (ice_tx_prepare_vlan_flags(tx_ring, first)) + goto out_drop; + + /* set up TSO offload */ + tso = ice_tso(first, &offload); + if (tso < 0) + goto out_drop; + + /* always set up Tx checksum offload */ + csum = ice_tx_csum(first, &offload); + if (csum < 0) + goto out_drop; + + if (tso || offload.cd_tunnel_params) { + struct ice_tx_ctx_desc *cdesc; + int i = tx_ring->next_to_use; + + /* grab the next descriptor */ + cdesc = ICE_TX_CTX_DESC(tx_ring, i); + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; + + /* setup context descriptor */ + cdesc->tunneling_params = cpu_to_le32(offload.cd_tunnel_params); + cdesc->l2tag2 = cpu_to_le16(offload.cd_l2tag2); + cdesc->rsvd = cpu_to_le16(0); + cdesc->qw1 = cpu_to_le64(offload.cd_qw1); + } + + ice_tx_map(tx_ring, first, &offload); + return NETDEV_TX_OK; + +out_drop: + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +/** + * ice_start_xmit - Selects the correct VSI and Tx queue to send buffer + * @skb: send buffer + * @netdev: network interface device structure + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_ring *tx_ring; + + tx_ring = vsi->tx_rings[skb->queue_mapping]; + + /* hardware can't handle really short frames, hardware padding works + * beyond this point + */ + if (skb_put_padto(skb, ICE_MIN_TX_LEN)) + return NETDEV_TX_OK; + + return ice_xmit_frame_ring(skb, tx_ring); +} diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h new file mode 100644 index 000000000..31bc998fe --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -0,0 +1,192 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_TXRX_H_ +#define _ICE_TXRX_H_ + +#define ICE_DFLT_IRQ_WORK 256 +#define ICE_RXBUF_2048 2048 +#define ICE_MAX_CHAINED_RX_BUFS 5 +#define ICE_MAX_BUF_TXD 8 +#define ICE_MIN_TX_LEN 17 + +/* The size limit for a transmit buffer in a descriptor is (16K - 1). + * In order to align with the read requests we will align the value to + * the nearest 4K which represents our maximum read request size. + */ +#define ICE_MAX_READ_REQ_SIZE 4096 +#define ICE_MAX_DATA_PER_TXD (16 * 1024 - 1) +#define ICE_MAX_DATA_PER_TXD_ALIGNED \ + (~(ICE_MAX_READ_REQ_SIZE - 1) & ICE_MAX_DATA_PER_TXD) + +#define ICE_RX_BUF_WRITE 16 /* Must be power of 2 */ +#define ICE_MAX_TXQ_PER_TXQG 128 + +/* Tx Descriptors needed, worst case */ +#define DESC_NEEDED (MAX_SKB_FRAGS + 4) +#define ICE_DESC_UNUSED(R) \ + ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ + (R)->next_to_clean - (R)->next_to_use - 1) + +#define ICE_TX_FLAGS_TSO BIT(0) +#define ICE_TX_FLAGS_HW_VLAN BIT(1) +#define ICE_TX_FLAGS_SW_VLAN BIT(2) +#define ICE_TX_FLAGS_VLAN_M 0xffff0000 +#define ICE_TX_FLAGS_VLAN_S 16 + +struct ice_tx_buf { + struct ice_tx_desc *next_to_watch; + struct sk_buff *skb; + unsigned int bytecount; + unsigned short gso_segs; + u32 tx_flags; + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); +}; + +struct ice_tx_offload_params { + u8 header_len; + u32 td_cmd; + u32 td_offset; + u32 td_l2tag1; + u16 cd_l2tag2; + u32 cd_tunnel_params; + u64 cd_qw1; + struct ice_ring *tx_ring; +}; + +struct ice_rx_buf { + struct sk_buff *skb; + dma_addr_t dma; + struct page *page; + unsigned int page_offset; +}; + +struct ice_q_stats { + u64 pkts; + u64 bytes; +}; + +struct ice_txq_stats { + u64 restart_q; + u64 tx_busy; + u64 tx_linearize; +}; + +struct ice_rxq_stats { + u64 non_eop_descs; + u64 alloc_page_failed; + u64 alloc_buf_failed; + u64 page_reuse_count; +}; + +/* this enum matches hardware bits and is meant to be used by DYN_CTLN + * registers and QINT registers or more generally anywhere in the manual + * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any + * register but instead is a special value meaning "don't update" ITR0/1/2. + */ +enum ice_dyn_idx_t { + ICE_IDX_ITR0 = 0, + ICE_IDX_ITR1 = 1, + ICE_IDX_ITR2 = 2, + ICE_ITR_NONE = 3 /* ITR_NONE must not be used as an index */ +}; + +/* Header split modes defined by DTYPE field of Rx RLAN context */ +enum ice_rx_dtype { + ICE_RX_DTYPE_NO_SPLIT = 0, + ICE_RX_DTYPE_HEADER_SPLIT = 1, + ICE_RX_DTYPE_SPLIT_ALWAYS = 2, +}; + +/* indices into GLINT_ITR registers */ +#define ICE_RX_ITR ICE_IDX_ITR0 +#define ICE_TX_ITR ICE_IDX_ITR1 +#define ICE_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ +#define ICE_ITR_8K 0x003E + +/* apply ITR HW granularity translation to program the HW registers */ +#define ITR_TO_REG(val, itr_gran) (((val) & ~ICE_ITR_DYNAMIC) >> (itr_gran)) + +/* Legacy or Advanced Mode Queue */ +#define ICE_TX_ADVANCED 0 +#define ICE_TX_LEGACY 1 + +/* descriptor ring, associated with a VSI */ +struct ice_ring { + struct ice_ring *next; /* pointer to next ring in q_vector */ + void *desc; /* Descriptor ring memory */ + struct device *dev; /* Used for DMA mapping */ + struct net_device *netdev; /* netdev ring maps to */ + struct ice_vsi *vsi; /* Backreference to associated VSI */ + struct ice_q_vector *q_vector; /* Backreference to associated vector */ + u8 __iomem *tail; + union { + struct ice_tx_buf *tx_buf; + struct ice_rx_buf *rx_buf; + }; + u16 q_index; /* Queue number of ring */ + u32 txq_teid; /* Added Tx queue TEID */ + + /* high bit set means dynamic, use accessor routines to read/write. + * hardware supports 2us/1us resolution for the ITR registers. + * these values always store the USER setting, and must be converted + * before programming to a register. + */ + u16 rx_itr_setting; + u16 tx_itr_setting; + + u16 count; /* Number of descriptors */ + u16 reg_idx; /* HW register index of the ring */ + + /* used in interrupt processing */ + u16 next_to_use; + u16 next_to_clean; + + u8 ring_active; /* is ring online or not */ + + /* stats structs */ + struct ice_q_stats stats; + struct u64_stats_sync syncp; + union { + struct ice_txq_stats tx_stats; + struct ice_rxq_stats rx_stats; + }; + + unsigned int size; /* length of descriptor ring in bytes */ + dma_addr_t dma; /* physical address of ring */ + struct rcu_head rcu; /* to avoid race on free */ + u16 next_to_alloc; +} ____cacheline_internodealigned_in_smp; + +enum ice_latency_range { + ICE_LOWEST_LATENCY = 0, + ICE_LOW_LATENCY = 1, + ICE_BULK_LATENCY = 2, + ICE_ULTRA_LATENCY = 3, +}; + +struct ice_ring_container { + /* array of pointers to rings */ + struct ice_ring *ring; + unsigned int total_bytes; /* total bytes processed this int */ + unsigned int total_pkts; /* total packets processed this int */ + enum ice_latency_range latency_range; + u16 itr; +}; + +/* iterator for handling rings in ring container */ +#define ice_for_each_ring(pos, head) \ + for (pos = (head).ring; pos; pos = pos->next) + +bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count); +netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev); +void ice_clean_tx_ring(struct ice_ring *tx_ring); +void ice_clean_rx_ring(struct ice_ring *rx_ring); +int ice_setup_tx_ring(struct ice_ring *tx_ring); +int ice_setup_rx_ring(struct ice_ring *rx_ring); +void ice_free_tx_ring(struct ice_ring *tx_ring); +void ice_free_rx_ring(struct ice_ring *rx_ring); +int ice_napi_poll(struct napi_struct *napi, int budget); + +#endif /* _ICE_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h new file mode 100644 index 000000000..8105f11f0 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -0,0 +1,400 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_TYPE_H_ +#define _ICE_TYPE_H_ + +#include "ice_status.h" +#include "ice_hw_autogen.h" +#include "ice_osdep.h" +#include "ice_controlq.h" +#include "ice_lan_tx_rx.h" + +#define ICE_BYTES_PER_WORD 2 +#define ICE_BYTES_PER_DWORD 4 + +static inline bool ice_is_tc_ena(u8 bitmap, u8 tc) +{ + return test_bit(tc, (unsigned long *)&bitmap); +} + +/* debug masks - set these bits in hw->debug_mask to control output */ +#define ICE_DBG_INIT BIT_ULL(1) +#define ICE_DBG_LINK BIT_ULL(4) +#define ICE_DBG_QCTX BIT_ULL(6) +#define ICE_DBG_NVM BIT_ULL(7) +#define ICE_DBG_LAN BIT_ULL(8) +#define ICE_DBG_SW BIT_ULL(13) +#define ICE_DBG_SCHED BIT_ULL(14) +#define ICE_DBG_RES BIT_ULL(17) +#define ICE_DBG_AQ_MSG BIT_ULL(24) +#define ICE_DBG_AQ_CMD BIT_ULL(27) +#define ICE_DBG_USER BIT_ULL(31) + +enum ice_aq_res_ids { + ICE_NVM_RES_ID = 1, + ICE_SPD_RES_ID, + ICE_CHANGE_LOCK_RES_ID, + ICE_GLOBAL_CFG_LOCK_RES_ID +}; + +/* FW update timeout definitions are in milliseconds */ +#define ICE_NVM_TIMEOUT 180000 +#define ICE_CHANGE_LOCK_TIMEOUT 1000 +#define ICE_GLOBAL_CFG_LOCK_TIMEOUT 5000 + +enum ice_aq_res_access_type { + ICE_RES_READ = 1, + ICE_RES_WRITE +}; + +enum ice_fc_mode { + ICE_FC_NONE = 0, + ICE_FC_RX_PAUSE, + ICE_FC_TX_PAUSE, + ICE_FC_FULL, + ICE_FC_PFC, + ICE_FC_DFLT +}; + +enum ice_set_fc_aq_failures { + ICE_SET_FC_AQ_FAIL_NONE = 0, + ICE_SET_FC_AQ_FAIL_GET, + ICE_SET_FC_AQ_FAIL_SET, + ICE_SET_FC_AQ_FAIL_UPDATE +}; + +/* Various MAC types */ +enum ice_mac_type { + ICE_MAC_UNKNOWN = 0, + ICE_MAC_GENERIC, +}; + +/* Media Types */ +enum ice_media_type { + ICE_MEDIA_UNKNOWN = 0, + ICE_MEDIA_FIBER, + ICE_MEDIA_BASET, + ICE_MEDIA_BACKPLANE, + ICE_MEDIA_DA, +}; + +enum ice_vsi_type { + ICE_VSI_PF = 0, +}; + +struct ice_link_status { + /* Refer to ice_aq_phy_type for bits definition */ + u64 phy_type_low; + u16 max_frame_size; + u16 link_speed; + u16 req_speeds; + u8 lse_ena; /* Link Status Event notification */ + u8 link_info; + u8 an_info; + u8 ext_info; + u8 pacing; + /* Refer to #define from module_type[ICE_MODULE_TYPE_TOTAL_BYTE] of + * ice_aqc_get_phy_caps structure + */ + u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE]; +}; + +/* PHY info such as phy_type, etc... */ +struct ice_phy_info { + struct ice_link_status link_info; + struct ice_link_status link_info_old; + u64 phy_type_low; + enum ice_media_type media_type; + u8 get_link_info; +}; + +/* Common HW capabilities for SW use */ +struct ice_hw_common_caps { + /* TX/RX queues */ + u16 num_rxq; /* Number/Total RX queues */ + u16 rxq_first_id; /* First queue ID for RX queues */ + u16 num_txq; /* Number/Total TX queues */ + u16 txq_first_id; /* First queue ID for TX queues */ + + /* MSI-X vectors */ + u16 num_msix_vectors; + u16 msix_vector_first_id; + + /* Max MTU for function or device */ + u16 max_mtu; + + /* RSS related capabilities */ + u16 rss_table_size; /* 512 for PFs and 64 for VFs */ + u8 rss_table_entry_width; /* RSS Entry width in bits */ +}; + +/* Function specific capabilities */ +struct ice_hw_func_caps { + struct ice_hw_common_caps common_cap; + u32 guaranteed_num_vsi; +}; + +/* Device wide capabilities */ +struct ice_hw_dev_caps { + struct ice_hw_common_caps common_cap; + u32 num_vsi_allocd_to_host; /* Excluding EMP VSI */ +}; + +/* MAC info */ +struct ice_mac_info { + u8 lan_addr[ETH_ALEN]; + u8 perm_addr[ETH_ALEN]; +}; + +/* Various RESET request, These are not tied with HW reset types */ +enum ice_reset_req { + ICE_RESET_PFR = 0, + ICE_RESET_CORER = 1, + ICE_RESET_GLOBR = 2, +}; + +/* Bus parameters */ +struct ice_bus_info { + u16 device; + u8 func; +}; + +/* Flow control (FC) parameters */ +struct ice_fc_info { + enum ice_fc_mode current_mode; /* FC mode in effect */ + enum ice_fc_mode req_mode; /* FC mode requested by caller */ +}; + +/* NVM Information */ +struct ice_nvm_info { + u32 eetrack; /* NVM data version */ + u32 oem_ver; /* OEM version info */ + u16 sr_words; /* Shadow RAM size in words */ + u16 ver; /* NVM package version */ + u8 blank_nvm_mode; /* is NVM empty (no FW present) */ +}; + +/* Max number of port to queue branches w.r.t topology */ +#define ICE_MAX_TRAFFIC_CLASS 8 +#define ICE_TXSCHED_MAX_BRANCHES ICE_MAX_TRAFFIC_CLASS + +struct ice_sched_node { + struct ice_sched_node *parent; + struct ice_sched_node *sibling; /* next sibling in the same layer */ + struct ice_sched_node **children; + struct ice_aqc_txsched_elem_data info; + u32 agg_id; /* aggregator group id */ + u16 vsi_id; + u8 in_use; /* suspended or in use */ + u8 tx_sched_layer; /* Logical Layer (1-9) */ + u8 num_children; + u8 tc_num; + u8 owner; +#define ICE_SCHED_NODE_OWNER_LAN 0 +}; + +/* Access Macros for Tx Sched Elements data */ +#define ICE_TXSCHED_GET_NODE_TEID(x) le32_to_cpu((x)->info.node_teid) + +/* The aggregator type determines if identifier is for a VSI group, + * aggregator group, aggregator of queues, or queue group. + */ +enum ice_agg_type { + ICE_AGG_TYPE_UNKNOWN = 0, + ICE_AGG_TYPE_VSI, + ICE_AGG_TYPE_AGG, /* aggregator */ + ICE_AGG_TYPE_Q, + ICE_AGG_TYPE_QG +}; + +#define ICE_SCHED_DFLT_RL_PROF_ID 0 + +/* vsi type list entry to locate corresponding vsi/ag nodes */ +struct ice_sched_vsi_info { + struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS]; + struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS]; + struct list_head list_entry; + u16 max_lanq[ICE_MAX_TRAFFIC_CLASS]; + u16 vsi_id; +}; + +/* driver defines the policy */ +struct ice_sched_tx_policy { + u16 max_num_vsis; + u8 max_num_lan_qs_per_tc[ICE_MAX_TRAFFIC_CLASS]; + u8 rdma_ena; +}; + +struct ice_port_info { + struct ice_sched_node *root; /* Root Node per Port */ + struct ice_hw *hw; /* back pointer to hw instance */ + u32 last_node_teid; /* scheduler last node info */ + u16 sw_id; /* Initial switch ID belongs to port */ + u16 pf_vf_num; + u8 port_state; +#define ICE_SCHED_PORT_STATE_INIT 0x0 +#define ICE_SCHED_PORT_STATE_READY 0x1 + u16 dflt_tx_vsi_rule_id; + u16 dflt_tx_vsi_num; + u16 dflt_rx_vsi_rule_id; + u16 dflt_rx_vsi_num; + struct ice_fc_info fc; + struct ice_mac_info mac; + struct ice_phy_info phy; + struct mutex sched_lock; /* protect access to TXSched tree */ + struct ice_sched_tx_policy sched_policy; + struct list_head vsi_info_list; + struct list_head agg_list; /* lists all aggregator */ + u8 lport; +#define ICE_LPORT_MASK 0xff + u8 is_vf; +}; + +struct ice_switch_info { + /* Switch VSI lists to MAC/VLAN translation */ + struct mutex mac_list_lock; /* protect MAC list */ + struct list_head mac_list_head; + struct mutex vlan_list_lock; /* protect VLAN list */ + struct list_head vlan_list_head; + struct mutex eth_m_list_lock; /* protect ethtype list */ + struct list_head eth_m_list_head; + struct mutex promisc_list_lock; /* protect promisc mode list */ + struct list_head promisc_list_head; + struct mutex mac_vlan_list_lock; /* protect MAC-VLAN list */ + struct list_head mac_vlan_list_head; + + struct list_head vsi_list_map_head; +}; + +/* Port hardware description */ +struct ice_hw { + u8 __iomem *hw_addr; + void *back; + struct ice_aqc_layer_props *layer_info; + struct ice_port_info *port_info; + u64 debug_mask; /* bitmap for debug mask */ + enum ice_mac_type mac_type; + + /* pci info */ + u16 device_id; + u16 vendor_id; + u16 subsystem_device_id; + u16 subsystem_vendor_id; + u8 revision_id; + + u8 pf_id; /* device profile info */ + + /* TX Scheduler values */ + u16 num_tx_sched_layers; + u16 num_tx_sched_phys_layers; + u8 flattened_layers; + u8 max_cgds; + u8 sw_entry_point_layer; + + u8 evb_veb; /* true for VEB, false for VEPA */ + u8 reset_ongoing; /* true if hw is in reset, false otherwise */ + struct ice_bus_info bus; + struct ice_nvm_info nvm; + struct ice_hw_dev_caps dev_caps; /* device capabilities */ + struct ice_hw_func_caps func_caps; /* function capabilities */ + + struct ice_switch_info *switch_info; /* switch filter lists */ + + /* Control Queue info */ + struct ice_ctl_q_info adminq; + + u8 api_branch; /* API branch version */ + u8 api_maj_ver; /* API major version */ + u8 api_min_ver; /* API minor version */ + u8 api_patch; /* API patch version */ + u8 fw_branch; /* firmware branch version */ + u8 fw_maj_ver; /* firmware major version */ + u8 fw_min_ver; /* firmware minor version */ + u8 fw_patch; /* firmware patch version */ + u32 fw_build; /* firmware build number */ + + /* minimum allowed value for different speeds */ +#define ICE_ITR_GRAN_MIN_200 1 +#define ICE_ITR_GRAN_MIN_100 1 +#define ICE_ITR_GRAN_MIN_50 2 +#define ICE_ITR_GRAN_MIN_25 4 + /* ITR granularity in 1 us */ + u8 itr_gran_200; + u8 itr_gran_100; + u8 itr_gran_50; + u8 itr_gran_25; + u8 ucast_shared; /* true if VSIs can share unicast addr */ + +}; + +/* Statistics collected by each port, VSI, VEB, and S-channel */ +struct ice_eth_stats { + u64 rx_bytes; /* gorc */ + u64 rx_unicast; /* uprc */ + u64 rx_multicast; /* mprc */ + u64 rx_broadcast; /* bprc */ + u64 rx_discards; /* rdpc */ + u64 rx_unknown_protocol; /* rupp */ + u64 tx_bytes; /* gotc */ + u64 tx_unicast; /* uptc */ + u64 tx_multicast; /* mptc */ + u64 tx_broadcast; /* bptc */ + u64 tx_discards; /* tdpc */ + u64 tx_errors; /* tepc */ +}; + +/* Statistics collected by the MAC */ +struct ice_hw_port_stats { + /* eth stats collected by the port */ + struct ice_eth_stats eth; + /* additional port specific stats */ + u64 tx_dropped_link_down; /* tdold */ + u64 crc_errors; /* crcerrs */ + u64 illegal_bytes; /* illerrc */ + u64 error_bytes; /* errbc */ + u64 mac_local_faults; /* mlfc */ + u64 mac_remote_faults; /* mrfc */ + u64 rx_len_errors; /* rlec */ + u64 link_xon_rx; /* lxonrxc */ + u64 link_xoff_rx; /* lxoffrxc */ + u64 link_xon_tx; /* lxontxc */ + u64 link_xoff_tx; /* lxofftxc */ + u64 rx_size_64; /* prc64 */ + u64 rx_size_127; /* prc127 */ + u64 rx_size_255; /* prc255 */ + u64 rx_size_511; /* prc511 */ + u64 rx_size_1023; /* prc1023 */ + u64 rx_size_1522; /* prc1522 */ + u64 rx_size_big; /* prc9522 */ + u64 rx_undersize; /* ruc */ + u64 rx_fragments; /* rfc */ + u64 rx_oversize; /* roc */ + u64 rx_jabber; /* rjc */ + u64 tx_size_64; /* ptc64 */ + u64 tx_size_127; /* ptc127 */ + u64 tx_size_255; /* ptc255 */ + u64 tx_size_511; /* ptc511 */ + u64 tx_size_1023; /* ptc1023 */ + u64 tx_size_1522; /* ptc1522 */ + u64 tx_size_big; /* ptc9522 */ +}; + +/* Checksum and Shadow RAM pointers */ +#define ICE_SR_NVM_DEV_STARTER_VER 0x18 +#define ICE_SR_NVM_EETRACK_LO 0x2D +#define ICE_SR_NVM_EETRACK_HI 0x2E +#define ICE_NVM_VER_LO_SHIFT 0 +#define ICE_NVM_VER_LO_MASK (0xff << ICE_NVM_VER_LO_SHIFT) +#define ICE_NVM_VER_HI_SHIFT 12 +#define ICE_NVM_VER_HI_MASK (0xf << ICE_NVM_VER_HI_SHIFT) +#define ICE_OEM_VER_PATCH_SHIFT 0 +#define ICE_OEM_VER_PATCH_MASK (0xff << ICE_OEM_VER_PATCH_SHIFT) +#define ICE_OEM_VER_BUILD_SHIFT 8 +#define ICE_OEM_VER_BUILD_MASK (0xffff << ICE_OEM_VER_BUILD_SHIFT) +#define ICE_OEM_VER_SHIFT 24 +#define ICE_OEM_VER_MASK (0xff << ICE_OEM_VER_SHIFT) +#define ICE_SR_SECTOR_SIZE_IN_WORDS 0x800 +#define ICE_SR_WORDS_IN_1KB 512 + +#endif /* _ICE_TYPE_H_ */ |