summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/amazon/ena
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
commitace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
treeb2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/net/ethernet/amazon/ena
parentInitial commit. (diff)
downloadlinux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/net/ethernet/amazon/ena')
-rw-r--r--drivers/net/ethernet/amazon/ena/Makefile8
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_admin_defs.h1198
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_com.c3001
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_com.h1025
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_common_defs.h21
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_eth_com.c646
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_eth_com.h261
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h390
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_ethtool.c1066
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c4832
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.h467
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h45
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_regs_defs.h130
13 files changed, 13090 insertions, 0 deletions
diff --git a/drivers/net/ethernet/amazon/ena/Makefile b/drivers/net/ethernet/amazon/ena/Makefile
new file mode 100644
index 0000000000..f1f752a8f7
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the Elastic Network Adapter (ENA) device drivers.
+#
+
+obj-$(CONFIG_ENA_ETHERNET) += ena.o
+
+ena-y := ena_netdev.o ena_com.o ena_eth_com.o ena_ethtool.o
diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
new file mode 100644
index 0000000000..6de0d590be
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -0,0 +1,1198 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+#ifndef _ENA_ADMIN_H_
+#define _ENA_ADMIN_H_
+
+#define ENA_ADMIN_RSS_KEY_PARTS 10
+
+enum ena_admin_aq_opcode {
+ ENA_ADMIN_CREATE_SQ = 1,
+ ENA_ADMIN_DESTROY_SQ = 2,
+ ENA_ADMIN_CREATE_CQ = 3,
+ ENA_ADMIN_DESTROY_CQ = 4,
+ ENA_ADMIN_GET_FEATURE = 8,
+ ENA_ADMIN_SET_FEATURE = 9,
+ ENA_ADMIN_GET_STATS = 11,
+};
+
+enum ena_admin_aq_completion_status {
+ ENA_ADMIN_SUCCESS = 0,
+ ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE = 1,
+ ENA_ADMIN_BAD_OPCODE = 2,
+ ENA_ADMIN_UNSUPPORTED_OPCODE = 3,
+ ENA_ADMIN_MALFORMED_REQUEST = 4,
+ /* Additional status is provided in ACQ entry extended_status */
+ ENA_ADMIN_ILLEGAL_PARAMETER = 5,
+ ENA_ADMIN_UNKNOWN_ERROR = 6,
+ ENA_ADMIN_RESOURCE_BUSY = 7,
+};
+
+/* subcommands for the set/get feature admin commands */
+enum ena_admin_aq_feature_id {
+ ENA_ADMIN_DEVICE_ATTRIBUTES = 1,
+ ENA_ADMIN_MAX_QUEUES_NUM = 2,
+ ENA_ADMIN_HW_HINTS = 3,
+ ENA_ADMIN_LLQ = 4,
+ ENA_ADMIN_MAX_QUEUES_EXT = 7,
+ ENA_ADMIN_RSS_HASH_FUNCTION = 10,
+ ENA_ADMIN_STATELESS_OFFLOAD_CONFIG = 11,
+ ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG = 12,
+ ENA_ADMIN_MTU = 14,
+ ENA_ADMIN_RSS_HASH_INPUT = 18,
+ ENA_ADMIN_INTERRUPT_MODERATION = 20,
+ ENA_ADMIN_AENQ_CONFIG = 26,
+ ENA_ADMIN_LINK_CONFIG = 27,
+ ENA_ADMIN_HOST_ATTR_CONFIG = 28,
+ ENA_ADMIN_FEATURES_OPCODE_NUM = 32,
+};
+
+/* device capabilities */
+enum ena_admin_aq_caps_id {
+ ENA_ADMIN_ENI_STATS = 0,
+};
+
+enum ena_admin_placement_policy_type {
+ /* descriptors and headers are in host memory */
+ ENA_ADMIN_PLACEMENT_POLICY_HOST = 1,
+ /* descriptors and headers are in device memory (a.k.a Low Latency
+ * Queue)
+ */
+ ENA_ADMIN_PLACEMENT_POLICY_DEV = 3,
+};
+
+enum ena_admin_link_types {
+ ENA_ADMIN_LINK_SPEED_1G = 0x1,
+ ENA_ADMIN_LINK_SPEED_2_HALF_G = 0x2,
+ ENA_ADMIN_LINK_SPEED_5G = 0x4,
+ ENA_ADMIN_LINK_SPEED_10G = 0x8,
+ ENA_ADMIN_LINK_SPEED_25G = 0x10,
+ ENA_ADMIN_LINK_SPEED_40G = 0x20,
+ ENA_ADMIN_LINK_SPEED_50G = 0x40,
+ ENA_ADMIN_LINK_SPEED_100G = 0x80,
+ ENA_ADMIN_LINK_SPEED_200G = 0x100,
+ ENA_ADMIN_LINK_SPEED_400G = 0x200,
+};
+
+enum ena_admin_completion_policy_type {
+ /* completion queue entry for each sq descriptor */
+ ENA_ADMIN_COMPLETION_POLICY_DESC = 0,
+ /* completion queue entry upon request in sq descriptor */
+ ENA_ADMIN_COMPLETION_POLICY_DESC_ON_DEMAND = 1,
+ /* current queue head pointer is updated in OS memory upon sq
+ * descriptor request
+ */
+ ENA_ADMIN_COMPLETION_POLICY_HEAD_ON_DEMAND = 2,
+ /* current queue head pointer is updated in OS memory for each sq
+ * descriptor
+ */
+ ENA_ADMIN_COMPLETION_POLICY_HEAD = 3,
+};
+
+/* basic stats return ena_admin_basic_stats while extanded stats return a
+ * buffer (string format) with additional statistics per queue and per
+ * device id
+ */
+enum ena_admin_get_stats_type {
+ ENA_ADMIN_GET_STATS_TYPE_BASIC = 0,
+ ENA_ADMIN_GET_STATS_TYPE_EXTENDED = 1,
+ /* extra HW stats for specific network interface */
+ ENA_ADMIN_GET_STATS_TYPE_ENI = 2,
+};
+
+enum ena_admin_get_stats_scope {
+ ENA_ADMIN_SPECIFIC_QUEUE = 0,
+ ENA_ADMIN_ETH_TRAFFIC = 1,
+};
+
+struct ena_admin_aq_common_desc {
+ /* 11:0 : command_id
+ * 15:12 : reserved12
+ */
+ u16 command_id;
+
+ /* as appears in ena_admin_aq_opcode */
+ u8 opcode;
+
+ /* 0 : phase
+ * 1 : ctrl_data - control buffer address valid
+ * 2 : ctrl_data_indirect - control buffer address
+ * points to list of pages with addresses of control
+ * buffers
+ * 7:3 : reserved3
+ */
+ u8 flags;
+};
+
+/* used in ena_admin_aq_entry. Can point directly to control data, or to a
+ * page list chunk. Used also at the end of indirect mode page list chunks,
+ * for chaining.
+ */
+struct ena_admin_ctrl_buff_info {
+ u32 length;
+
+ struct ena_common_mem_addr address;
+};
+
+struct ena_admin_sq {
+ u16 sq_idx;
+
+ /* 4:0 : reserved
+ * 7:5 : sq_direction - 0x1 - Tx; 0x2 - Rx
+ */
+ u8 sq_identity;
+
+ u8 reserved1;
+};
+
+struct ena_admin_aq_entry {
+ struct ena_admin_aq_common_desc aq_common_descriptor;
+
+ union {
+ u32 inline_data_w1[3];
+
+ struct ena_admin_ctrl_buff_info control_buffer;
+ } u;
+
+ u32 inline_data_w4[12];
+};
+
+struct ena_admin_acq_common_desc {
+ /* command identifier to associate it with the aq descriptor
+ * 11:0 : command_id
+ * 15:12 : reserved12
+ */
+ u16 command;
+
+ u8 status;
+
+ /* 0 : phase
+ * 7:1 : reserved1
+ */
+ u8 flags;
+
+ u16 extended_status;
+
+ /* indicates to the driver which AQ entry has been consumed by the
+ * device and could be reused
+ */
+ u16 sq_head_indx;
+};
+
+struct ena_admin_acq_entry {
+ struct ena_admin_acq_common_desc acq_common_descriptor;
+
+ u32 response_specific_data[14];
+};
+
+struct ena_admin_aq_create_sq_cmd {
+ struct ena_admin_aq_common_desc aq_common_descriptor;
+
+ /* 4:0 : reserved0_w1
+ * 7:5 : sq_direction - 0x1 - Tx, 0x2 - Rx
+ */
+ u8 sq_identity;
+
+ u8 reserved8_w1;
+
+ /* 3:0 : placement_policy - Describing where the SQ
+ * descriptor ring and the SQ packet headers reside:
+ * 0x1 - descriptors and headers are in OS memory,
+ * 0x3 - descriptors and headers in device memory
+ * (a.k.a Low Latency Queue)
+ * 6:4 : completion_policy - Describing what policy
+ * to use for generation completion entry (cqe) in
+ * the CQ associated with this SQ: 0x0 - cqe for each
+ * sq descriptor, 0x1 - cqe upon request in sq
+ * descriptor, 0x2 - current queue head pointer is
+ * updated in OS memory upon sq descriptor request
+ * 0x3 - current queue head pointer is updated in OS
+ * memory for each sq descriptor
+ * 7 : reserved15_w1
+ */
+ u8 sq_caps_2;
+
+ /* 0 : is_physically_contiguous - Described if the
+ * queue ring memory is allocated in physical
+ * contiguous pages or split.
+ * 7:1 : reserved17_w1
+ */
+ u8 sq_caps_3;
+
+ /* associated completion queue id. This CQ must be created prior to SQ
+ * creation
+ */
+ u16 cq_idx;
+
+ /* submission queue depth in entries */
+ u16 sq_depth;
+
+ /* SQ physical base address in OS memory. This field should not be
+ * used for Low Latency queues. Has to be page aligned.
+ */
+ struct ena_common_mem_addr sq_ba;
+
+ /* specifies queue head writeback location in OS memory. Valid if
+ * completion_policy is set to completion_policy_head_on_demand or
+ * completion_policy_head. Has to be cache aligned
+ */
+ struct ena_common_mem_addr sq_head_writeback;
+
+ u32 reserved0_w7;
+
+ u32 reserved0_w8;
+};
+
+enum ena_admin_sq_direction {
+ ENA_ADMIN_SQ_DIRECTION_TX = 1,
+ ENA_ADMIN_SQ_DIRECTION_RX = 2,
+};
+
+struct ena_admin_acq_create_sq_resp_desc {
+ struct ena_admin_acq_common_desc acq_common_desc;
+
+ u16 sq_idx;
+
+ u16 reserved;
+
+ /* queue doorbell address as an offset to PCIe MMIO REG BAR */
+ u32 sq_doorbell_offset;
+
+ /* low latency queue ring base address as an offset to PCIe MMIO
+ * LLQ_MEM BAR
+ */
+ u32 llq_descriptors_offset;
+
+ /* low latency queue headers' memory as an offset to PCIe MMIO
+ * LLQ_MEM BAR
+ */
+ u32 llq_headers_offset;
+};
+
+struct ena_admin_aq_destroy_sq_cmd {
+ struct ena_admin_aq_common_desc aq_common_descriptor;
+
+ struct ena_admin_sq sq;
+};
+
+struct ena_admin_acq_destroy_sq_resp_desc {
+ struct ena_admin_acq_common_desc acq_common_desc;
+};
+
+struct ena_admin_aq_create_cq_cmd {
+ struct ena_admin_aq_common_desc aq_common_descriptor;
+
+ /* 4:0 : reserved5
+ * 5 : interrupt_mode_enabled - if set, cq operates
+ * in interrupt mode, otherwise - polling
+ * 7:6 : reserved6
+ */
+ u8 cq_caps_1;
+
+ /* 4:0 : cq_entry_size_words - size of CQ entry in
+ * 32-bit words, valid values: 4, 8.
+ * 7:5 : reserved7
+ */
+ u8 cq_caps_2;
+
+ /* completion queue depth in # of entries. must be power of 2 */
+ u16 cq_depth;
+
+ /* msix vector assigned to this cq */
+ u32 msix_vector;
+
+ /* cq physical base address in OS memory. CQ must be physically
+ * contiguous
+ */
+ struct ena_common_mem_addr cq_ba;
+};
+
+struct ena_admin_acq_create_cq_resp_desc {
+ struct ena_admin_acq_common_desc acq_common_desc;
+
+ u16 cq_idx;
+
+ /* actual cq depth in number of entries */
+ u16 cq_actual_depth;
+
+ u32 numa_node_register_offset;
+
+ u32 cq_head_db_register_offset;
+
+ u32 cq_interrupt_unmask_register_offset;
+};
+
+struct ena_admin_aq_destroy_cq_cmd {
+ struct ena_admin_aq_common_desc aq_common_descriptor;
+
+ u16 cq_idx;
+
+ u16 reserved1;
+};
+
+struct ena_admin_acq_destroy_cq_resp_desc {
+ struct ena_admin_acq_common_desc acq_common_desc;
+};
+
+/* ENA AQ Get Statistics command. Extended statistics are placed in control
+ * buffer pointed by AQ entry
+ */
+struct ena_admin_aq_get_stats_cmd {
+ struct ena_admin_aq_common_desc aq_common_descriptor;
+
+ union {
+ /* command specific inline data */
+ u32 inline_data_w1[3];
+
+ struct ena_admin_ctrl_buff_info control_buffer;
+ } u;
+
+ /* stats type as defined in enum ena_admin_get_stats_type */
+ u8 type;
+
+ /* stats scope defined in enum ena_admin_get_stats_scope */
+ u8 scope;
+
+ u16 reserved3;
+
+ /* queue id. used when scope is specific_queue */
+ u16 queue_idx;
+
+ /* device id, value 0xFFFF means mine. only privileged device can get
+ * stats of other device
+ */
+ u16 device_id;
+};
+
+/* Basic Statistics Command. */
+struct ena_admin_basic_stats {
+ u32 tx_bytes_low;
+
+ u32 tx_bytes_high;
+
+ u32 tx_pkts_low;
+
+ u32 tx_pkts_high;
+
+ u32 rx_bytes_low;
+
+ u32 rx_bytes_high;
+
+ u32 rx_pkts_low;
+
+ u32 rx_pkts_high;
+
+ u32 rx_drops_low;
+
+ u32 rx_drops_high;
+
+ u32 tx_drops_low;
+
+ u32 tx_drops_high;
+};
+
+/* ENI Statistics Command. */
+struct ena_admin_eni_stats {
+ /* The number of packets shaped due to inbound aggregate BW
+ * allowance being exceeded
+ */
+ u64 bw_in_allowance_exceeded;
+
+ /* The number of packets shaped due to outbound aggregate BW
+ * allowance being exceeded
+ */
+ u64 bw_out_allowance_exceeded;
+
+ /* The number of packets shaped due to PPS allowance being exceeded */
+ u64 pps_allowance_exceeded;
+
+ /* The number of packets shaped due to connection tracking
+ * allowance being exceeded and leading to failure in establishment
+ * of new connections
+ */
+ u64 conntrack_allowance_exceeded;
+
+ /* The number of packets shaped due to linklocal packet rate
+ * allowance being exceeded
+ */
+ u64 linklocal_allowance_exceeded;
+};
+
+struct ena_admin_acq_get_stats_resp {
+ struct ena_admin_acq_common_desc acq_common_desc;
+
+ union {
+ u64 raw[7];
+
+ struct ena_admin_basic_stats basic_stats;
+
+ struct ena_admin_eni_stats eni_stats;
+ } u;
+};
+
+struct ena_admin_get_set_feature_common_desc {
+ /* 1:0 : select - 0x1 - current value; 0x3 - default
+ * value
+ * 7:3 : reserved3
+ */
+ u8 flags;
+
+ /* as appears in ena_admin_aq_feature_id */
+ u8 feature_id;
+
+ /* The driver specifies the max feature version it supports and the
+ * device responds with the currently supported feature version. The
+ * field is zero based
+ */
+ u8 feature_version;
+
+ u8 reserved8;
+};
+
+struct ena_admin_device_attr_feature_desc {
+ u32 impl_id;
+
+ u32 device_version;
+
+ /* bitmap of ena_admin_aq_feature_id, which represents supported
+ * subcommands for the set/get feature admin commands.
+ */
+ u32 supported_features;
+
+ /* bitmap of ena_admin_aq_caps_id, which represents device
+ * capabilities.
+ */
+ u32 capabilities;
+
+ /* Indicates how many bits are used physical address access. */
+ u32 phys_addr_width;
+
+ /* Indicates how many bits are used virtual address access. */
+ u32 virt_addr_width;
+
+ /* unicast MAC address (in Network byte order) */
+ u8 mac_addr[6];
+
+ u8 reserved7[2];
+
+ u32 max_mtu;
+};
+
+enum ena_admin_llq_header_location {
+ /* header is in descriptor list */
+ ENA_ADMIN_INLINE_HEADER = 1,
+ /* header in a separate ring, implies 16B descriptor list entry */
+ ENA_ADMIN_HEADER_RING = 2,
+};
+
+enum ena_admin_llq_ring_entry_size {
+ ENA_ADMIN_LIST_ENTRY_SIZE_128B = 1,
+ ENA_ADMIN_LIST_ENTRY_SIZE_192B = 2,
+ ENA_ADMIN_LIST_ENTRY_SIZE_256B = 4,
+};
+
+enum ena_admin_llq_num_descs_before_header {
+ ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_0 = 0,
+ ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_1 = 1,
+ ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2 = 2,
+ ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_4 = 4,
+ ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_8 = 8,
+};
+
+/* packet descriptor list entry always starts with one or more descriptors,
+ * followed by a header. The rest of the descriptors are located in the
+ * beginning of the subsequent entry. Stride refers to how the rest of the
+ * descriptors are placed. This field is relevant only for inline header
+ * mode
+ */
+enum ena_admin_llq_stride_ctrl {
+ ENA_ADMIN_SINGLE_DESC_PER_ENTRY = 1,
+ ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY = 2,
+};
+
+enum ena_admin_accel_mode_feat {
+ ENA_ADMIN_DISABLE_META_CACHING = 0,
+ ENA_ADMIN_LIMIT_TX_BURST = 1,
+};
+
+struct ena_admin_accel_mode_get {
+ /* bit field of enum ena_admin_accel_mode_feat */
+ u16 supported_flags;
+
+ /* maximum burst size between two doorbells. The size is in bytes */
+ u16 max_tx_burst_size;
+};
+
+struct ena_admin_accel_mode_set {
+ /* bit field of enum ena_admin_accel_mode_feat */
+ u16 enabled_flags;
+
+ u16 reserved;
+};
+
+struct ena_admin_accel_mode_req {
+ union {
+ u32 raw[2];
+
+ struct ena_admin_accel_mode_get get;
+
+ struct ena_admin_accel_mode_set set;
+ } u;
+};
+
+struct ena_admin_feature_llq_desc {
+ u32 max_llq_num;
+
+ u32 max_llq_depth;
+
+ /* specify the header locations the device supports. bitfield of enum
+ * ena_admin_llq_header_location.
+ */
+ u16 header_location_ctrl_supported;
+
+ /* the header location the driver selected to use. */
+ u16 header_location_ctrl_enabled;
+
+ /* if inline header is specified - this is the size of descriptor list
+ * entry. If header in a separate ring is specified - this is the size
+ * of header ring entry. bitfield of enum ena_admin_llq_ring_entry_size.
+ * specify the entry sizes the device supports
+ */
+ u16 entry_size_ctrl_supported;
+
+ /* the entry size the driver selected to use. */
+ u16 entry_size_ctrl_enabled;
+
+ /* valid only if inline header is specified. First entry associated with
+ * the packet includes descriptors and header. Rest of the entries
+ * occupied by descriptors. This parameter defines the max number of
+ * descriptors precedding the header in the first entry. The field is
+ * bitfield of enum ena_admin_llq_num_descs_before_header and specify
+ * the values the device supports
+ */
+ u16 desc_num_before_header_supported;
+
+ /* the desire field the driver selected to use */
+ u16 desc_num_before_header_enabled;
+
+ /* valid only if inline was chosen. bitfield of enum
+ * ena_admin_llq_stride_ctrl
+ */
+ u16 descriptors_stride_ctrl_supported;
+
+ /* the stride control the driver selected to use */
+ u16 descriptors_stride_ctrl_enabled;
+
+ /* reserved */
+ u32 reserved1;
+
+ /* accelerated low latency queues requirement. driver needs to
+ * support those requirements in order to use accelerated llq
+ */
+ struct ena_admin_accel_mode_req accel_mode;
+};
+
+struct ena_admin_queue_ext_feature_fields {
+ u32 max_tx_sq_num;
+
+ u32 max_tx_cq_num;
+
+ u32 max_rx_sq_num;
+
+ u32 max_rx_cq_num;
+
+ u32 max_tx_sq_depth;
+
+ u32 max_tx_cq_depth;
+
+ u32 max_rx_sq_depth;
+
+ u32 max_rx_cq_depth;
+
+ u32 max_tx_header_size;
+
+ /* Maximum Descriptors number, including meta descriptor, allowed for a
+ * single Tx packet
+ */
+ u16 max_per_packet_tx_descs;
+
+ /* Maximum Descriptors number allowed for a single Rx packet */
+ u16 max_per_packet_rx_descs;
+};
+
+struct ena_admin_queue_feature_desc {
+ u32 max_sq_num;
+
+ u32 max_sq_depth;
+
+ u32 max_cq_num;
+
+ u32 max_cq_depth;
+
+ u32 max_legacy_llq_num;
+
+ u32 max_legacy_llq_depth;
+
+ u32 max_header_size;
+
+ /* Maximum Descriptors number, including meta descriptor, allowed for a
+ * single Tx packet
+ */
+ u16 max_packet_tx_descs;
+
+ /* Maximum Descriptors number allowed for a single Rx packet */
+ u16 max_packet_rx_descs;
+};
+
+struct ena_admin_set_feature_mtu_desc {
+ /* exclude L2 */
+ u32 mtu;
+};
+
+struct ena_admin_set_feature_host_attr_desc {
+ /* host OS info base address in OS memory. host info is 4KB of
+ * physically contiguous
+ */
+ struct ena_common_mem_addr os_info_ba;
+
+ /* host debug area base address in OS memory. debug area must be
+ * physically contiguous
+ */
+ struct ena_common_mem_addr debug_ba;
+
+ /* debug area size */
+ u32 debug_area_size;
+};
+
+struct ena_admin_feature_intr_moder_desc {
+ /* interrupt delay granularity in usec */
+ u16 intr_delay_resolution;
+
+ u16 reserved;
+};
+
+struct ena_admin_get_feature_link_desc {
+ /* Link speed in Mb */
+ u32 speed;
+
+ /* bit field of enum ena_admin_link types */
+ u32 supported;
+
+ /* 0 : autoneg
+ * 1 : duplex - Full Duplex
+ * 31:2 : reserved2
+ */
+ u32 flags;
+};
+
+struct ena_admin_feature_aenq_desc {
+ /* bitmask for AENQ groups the device can report */
+ u32 supported_groups;
+
+ /* bitmask for AENQ groups to report */
+ u32 enabled_groups;
+};
+
+struct ena_admin_feature_offload_desc {
+ /* 0 : TX_L3_csum_ipv4
+ * 1 : TX_L4_ipv4_csum_part - The checksum field
+ * should be initialized with pseudo header checksum
+ * 2 : TX_L4_ipv4_csum_full
+ * 3 : TX_L4_ipv6_csum_part - The checksum field
+ * should be initialized with pseudo header checksum
+ * 4 : TX_L4_ipv6_csum_full
+ * 5 : tso_ipv4
+ * 6 : tso_ipv6
+ * 7 : tso_ecn
+ */
+ u32 tx;
+
+ /* Receive side supported stateless offload
+ * 0 : RX_L3_csum_ipv4 - IPv4 checksum
+ * 1 : RX_L4_ipv4_csum - TCP/UDP/IPv4 checksum
+ * 2 : RX_L4_ipv6_csum - TCP/UDP/IPv6 checksum
+ * 3 : RX_hash - Hash calculation
+ */
+ u32 rx_supported;
+
+ u32 rx_enabled;
+};
+
+enum ena_admin_hash_functions {
+ ENA_ADMIN_TOEPLITZ = 1,
+ ENA_ADMIN_CRC32 = 2,
+};
+
+struct ena_admin_feature_rss_flow_hash_control {
+ u32 key_parts;
+
+ u32 reserved;
+
+ u32 key[ENA_ADMIN_RSS_KEY_PARTS];
+};
+
+struct ena_admin_feature_rss_flow_hash_function {
+ /* 7:0 : funcs - bitmask of ena_admin_hash_functions */
+ u32 supported_func;
+
+ /* 7:0 : selected_func - bitmask of
+ * ena_admin_hash_functions
+ */
+ u32 selected_func;
+
+ /* initial value */
+ u32 init_val;
+};
+
+/* RSS flow hash protocols */
+enum ena_admin_flow_hash_proto {
+ ENA_ADMIN_RSS_TCP4 = 0,
+ ENA_ADMIN_RSS_UDP4 = 1,
+ ENA_ADMIN_RSS_TCP6 = 2,
+ ENA_ADMIN_RSS_UDP6 = 3,
+ ENA_ADMIN_RSS_IP4 = 4,
+ ENA_ADMIN_RSS_IP6 = 5,
+ ENA_ADMIN_RSS_IP4_FRAG = 6,
+ ENA_ADMIN_RSS_NOT_IP = 7,
+ /* TCPv6 with extension header */
+ ENA_ADMIN_RSS_TCP6_EX = 8,
+ /* IPv6 with extension header */
+ ENA_ADMIN_RSS_IP6_EX = 9,
+ ENA_ADMIN_RSS_PROTO_NUM = 16,
+};
+
+/* RSS flow hash fields */
+enum ena_admin_flow_hash_fields {
+ /* Ethernet Dest Addr */
+ ENA_ADMIN_RSS_L2_DA = BIT(0),
+ /* Ethernet Src Addr */
+ ENA_ADMIN_RSS_L2_SA = BIT(1),
+ /* ipv4/6 Dest Addr */
+ ENA_ADMIN_RSS_L3_DA = BIT(2),
+ /* ipv4/6 Src Addr */
+ ENA_ADMIN_RSS_L3_SA = BIT(3),
+ /* tcp/udp Dest Port */
+ ENA_ADMIN_RSS_L4_DP = BIT(4),
+ /* tcp/udp Src Port */
+ ENA_ADMIN_RSS_L4_SP = BIT(5),
+};
+
+struct ena_admin_proto_input {
+ /* flow hash fields (bitwise according to ena_admin_flow_hash_fields) */
+ u16 fields;
+
+ u16 reserved2;
+};
+
+struct ena_admin_feature_rss_hash_control {
+ struct ena_admin_proto_input supported_fields[ENA_ADMIN_RSS_PROTO_NUM];
+
+ struct ena_admin_proto_input selected_fields[ENA_ADMIN_RSS_PROTO_NUM];
+
+ struct ena_admin_proto_input reserved2[ENA_ADMIN_RSS_PROTO_NUM];
+
+ struct ena_admin_proto_input reserved3[ENA_ADMIN_RSS_PROTO_NUM];
+};
+
+struct ena_admin_feature_rss_flow_hash_input {
+ /* supported hash input sorting
+ * 1 : L3_sort - support swap L3 addresses if DA is
+ * smaller than SA
+ * 2 : L4_sort - support swap L4 ports if DP smaller
+ * SP
+ */
+ u16 supported_input_sort;
+
+ /* enabled hash input sorting
+ * 1 : enable_L3_sort - enable swap L3 addresses if
+ * DA smaller than SA
+ * 2 : enable_L4_sort - enable swap L4 ports if DP
+ * smaller than SP
+ */
+ u16 enabled_input_sort;
+};
+
+enum ena_admin_os_type {
+ ENA_ADMIN_OS_LINUX = 1,
+ ENA_ADMIN_OS_WIN = 2,
+ ENA_ADMIN_OS_DPDK = 3,
+ ENA_ADMIN_OS_FREEBSD = 4,
+ ENA_ADMIN_OS_IPXE = 5,
+ ENA_ADMIN_OS_ESXI = 6,
+ ENA_ADMIN_OS_GROUPS_NUM = 6,
+};
+
+struct ena_admin_host_info {
+ /* defined in enum ena_admin_os_type */
+ u32 os_type;
+
+ /* os distribution string format */
+ u8 os_dist_str[128];
+
+ /* OS distribution numeric format */
+ u32 os_dist;
+
+ /* kernel version string format */
+ u8 kernel_ver_str[32];
+
+ /* Kernel version numeric format */
+ u32 kernel_ver;
+
+ /* 7:0 : major
+ * 15:8 : minor
+ * 23:16 : sub_minor
+ * 31:24 : module_type
+ */
+ u32 driver_version;
+
+ /* features bitmap */
+ u32 supported_network_features[2];
+
+ /* ENA spec version of driver */
+ u16 ena_spec_version;
+
+ /* ENA device's Bus, Device and Function
+ * 2:0 : function
+ * 7:3 : device
+ * 15:8 : bus
+ */
+ u16 bdf;
+
+ /* Number of CPUs */
+ u16 num_cpus;
+
+ u16 reserved;
+
+ /* 0 : reserved
+ * 1 : rx_offset
+ * 2 : interrupt_moderation
+ * 3 : rx_buf_mirroring
+ * 4 : rss_configurable_function_key
+ * 5 : reserved
+ * 6 : rx_page_reuse
+ * 31:7 : reserved
+ */
+ u32 driver_supported_features;
+};
+
+struct ena_admin_rss_ind_table_entry {
+ u16 cq_idx;
+
+ u16 reserved;
+};
+
+struct ena_admin_feature_rss_ind_table {
+ /* min supported table size (2^min_size) */
+ u16 min_size;
+
+ /* max supported table size (2^max_size) */
+ u16 max_size;
+
+ /* table size (2^size) */
+ u16 size;
+
+ u16 reserved;
+
+ /* index of the inline entry. 0xFFFFFFFF means invalid */
+ u32 inline_index;
+
+ /* used for updating single entry, ignored when setting the entire
+ * table through the control buffer.
+ */
+ struct ena_admin_rss_ind_table_entry inline_entry;
+};
+
+/* When hint value is 0, driver should use it's own predefined value */
+struct ena_admin_ena_hw_hints {
+ /* value in ms */
+ u16 mmio_read_timeout;
+
+ /* value in ms */
+ u16 driver_watchdog_timeout;
+
+ /* Per packet tx completion timeout. value in ms */
+ u16 missing_tx_completion_timeout;
+
+ u16 missed_tx_completion_count_threshold_to_reset;
+
+ /* value in ms */
+ u16 admin_completion_tx_timeout;
+
+ u16 netdev_wd_timeout;
+
+ u16 max_tx_sgl_size;
+
+ u16 max_rx_sgl_size;
+
+ u16 reserved[8];
+};
+
+struct ena_admin_get_feat_cmd {
+ struct ena_admin_aq_common_desc aq_common_descriptor;
+
+ struct ena_admin_ctrl_buff_info control_buffer;
+
+ struct ena_admin_get_set_feature_common_desc feat_common;
+
+ u32 raw[11];
+};
+
+struct ena_admin_queue_ext_feature_desc {
+ /* version */
+ u8 version;
+
+ u8 reserved1[3];
+
+ union {
+ struct ena_admin_queue_ext_feature_fields max_queue_ext;
+
+ u32 raw[10];
+ };
+};
+
+struct ena_admin_get_feat_resp {
+ struct ena_admin_acq_common_desc acq_common_desc;
+
+ union {
+ u32 raw[14];
+
+ struct ena_admin_device_attr_feature_desc dev_attr;
+
+ struct ena_admin_feature_llq_desc llq;
+
+ struct ena_admin_queue_feature_desc max_queue;
+
+ struct ena_admin_queue_ext_feature_desc max_queue_ext;
+
+ struct ena_admin_feature_aenq_desc aenq;
+
+ struct ena_admin_get_feature_link_desc link;
+
+ struct ena_admin_feature_offload_desc offload;
+
+ struct ena_admin_feature_rss_flow_hash_function flow_hash_func;
+
+ struct ena_admin_feature_rss_flow_hash_input flow_hash_input;
+
+ struct ena_admin_feature_rss_ind_table ind_table;
+
+ struct ena_admin_feature_intr_moder_desc intr_moderation;
+
+ struct ena_admin_ena_hw_hints hw_hints;
+ } u;
+};
+
+struct ena_admin_set_feat_cmd {
+ struct ena_admin_aq_common_desc aq_common_descriptor;
+
+ struct ena_admin_ctrl_buff_info control_buffer;
+
+ struct ena_admin_get_set_feature_common_desc feat_common;
+
+ union {
+ u32 raw[11];
+
+ /* mtu size */
+ struct ena_admin_set_feature_mtu_desc mtu;
+
+ /* host attributes */
+ struct ena_admin_set_feature_host_attr_desc host_attr;
+
+ /* AENQ configuration */
+ struct ena_admin_feature_aenq_desc aenq;
+
+ /* rss flow hash function */
+ struct ena_admin_feature_rss_flow_hash_function flow_hash_func;
+
+ /* rss flow hash input */
+ struct ena_admin_feature_rss_flow_hash_input flow_hash_input;
+
+ /* rss indirection table */
+ struct ena_admin_feature_rss_ind_table ind_table;
+
+ /* LLQ configuration */
+ struct ena_admin_feature_llq_desc llq;
+ } u;
+};
+
+struct ena_admin_set_feat_resp {
+ struct ena_admin_acq_common_desc acq_common_desc;
+
+ union {
+ u32 raw[14];
+ } u;
+};
+
+struct ena_admin_aenq_common_desc {
+ u16 group;
+
+ u16 syndrome;
+
+ /* 0 : phase
+ * 7:1 : reserved - MBZ
+ */
+ u8 flags;
+
+ u8 reserved1[3];
+
+ u32 timestamp_low;
+
+ u32 timestamp_high;
+};
+
+/* asynchronous event notification groups */
+enum ena_admin_aenq_group {
+ ENA_ADMIN_LINK_CHANGE = 0,
+ ENA_ADMIN_FATAL_ERROR = 1,
+ ENA_ADMIN_WARNING = 2,
+ ENA_ADMIN_NOTIFICATION = 3,
+ ENA_ADMIN_KEEP_ALIVE = 4,
+ ENA_ADMIN_AENQ_GROUPS_NUM = 5,
+};
+
+enum ena_admin_aenq_notification_syndrome {
+ ENA_ADMIN_UPDATE_HINTS = 2,
+};
+
+struct ena_admin_aenq_entry {
+ struct ena_admin_aenq_common_desc aenq_common_desc;
+
+ /* command specific inline data */
+ u32 inline_data_w4[12];
+};
+
+struct ena_admin_aenq_link_change_desc {
+ struct ena_admin_aenq_common_desc aenq_common_desc;
+
+ /* 0 : link_status */
+ u32 flags;
+};
+
+struct ena_admin_aenq_keep_alive_desc {
+ struct ena_admin_aenq_common_desc aenq_common_desc;
+
+ u32 rx_drops_low;
+
+ u32 rx_drops_high;
+
+ u32 tx_drops_low;
+
+ u32 tx_drops_high;
+};
+
+struct ena_admin_ena_mmio_req_read_less_resp {
+ u16 req_id;
+
+ u16 reg_off;
+
+ /* value is valid when poll is cleared */
+ u32 reg_val;
+};
+
+/* aq_common_desc */
+#define ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
+#define ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0)
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1)
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2)
+
+/* sq */
+#define ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT 5
+#define ENA_ADMIN_SQ_SQ_DIRECTION_MASK GENMASK(7, 5)
+
+/* acq_common_desc */
+#define ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
+#define ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK BIT(0)
+
+/* aq_create_sq_cmd */
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT 5
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK GENMASK(7, 5)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK GENMASK(3, 0)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT 4
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK GENMASK(6, 4)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK BIT(0)
+
+/* aq_create_cq_cmd */
+#define ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5
+#define ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5)
+#define ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
+
+/* get_set_feature_common_desc */
+#define ENA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0)
+
+/* get_feature_link_desc */
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK BIT(0)
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_SHIFT 1
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_MASK BIT(1)
+
+/* feature_offload_desc */
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK BIT(0)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_SHIFT 1
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_SHIFT 2
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK BIT(2)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_SHIFT 3
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK BIT(3)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_SHIFT 4
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK BIT(4)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_SHIFT 5
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK BIT(5)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_SHIFT 6
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK BIT(6)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_SHIFT 7
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK BIT(7)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK BIT(0)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_SHIFT 1
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_SHIFT 2
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK BIT(2)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_SHIFT 3
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK BIT(3)
+
+/* feature_rss_flow_hash_function */
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_FUNCS_MASK GENMASK(7, 0)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_SELECTED_FUNC_MASK GENMASK(7, 0)
+
+/* feature_rss_flow_hash_input */
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_SHIFT 1
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_SHIFT 2
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK BIT(2)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_SHIFT 1
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_SHIFT 2
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_MASK BIT(2)
+
+/* host_info */
+#define ENA_ADMIN_HOST_INFO_MAJOR_MASK GENMASK(7, 0)
+#define ENA_ADMIN_HOST_INFO_MINOR_SHIFT 8
+#define ENA_ADMIN_HOST_INFO_MINOR_MASK GENMASK(15, 8)
+#define ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT 16
+#define ENA_ADMIN_HOST_INFO_SUB_MINOR_MASK GENMASK(23, 16)
+#define ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT 24
+#define ENA_ADMIN_HOST_INFO_MODULE_TYPE_MASK GENMASK(31, 24)
+#define ENA_ADMIN_HOST_INFO_FUNCTION_MASK GENMASK(2, 0)
+#define ENA_ADMIN_HOST_INFO_DEVICE_SHIFT 3
+#define ENA_ADMIN_HOST_INFO_DEVICE_MASK GENMASK(7, 3)
+#define ENA_ADMIN_HOST_INFO_BUS_SHIFT 8
+#define ENA_ADMIN_HOST_INFO_BUS_MASK GENMASK(15, 8)
+#define ENA_ADMIN_HOST_INFO_RX_OFFSET_SHIFT 1
+#define ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK BIT(1)
+#define ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_SHIFT 2
+#define ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK BIT(2)
+#define ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_SHIFT 3
+#define ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK BIT(3)
+#define ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_SHIFT 4
+#define ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK BIT(4)
+#define ENA_ADMIN_HOST_INFO_RX_PAGE_REUSE_SHIFT 6
+#define ENA_ADMIN_HOST_INFO_RX_PAGE_REUSE_MASK BIT(6)
+
+/* aenq_common_desc */
+#define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0)
+
+/* aenq_link_change_desc */
+#define ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK BIT(0)
+
+#endif /* _ENA_ADMIN_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
new file mode 100644
index 0000000000..633b321d7f
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -0,0 +1,3001 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include "ena_com.h"
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/* Timeout in micro-sec */
+#define ADMIN_CMD_TIMEOUT_US (3000000)
+
+#define ENA_ASYNC_QUEUE_DEPTH 16
+#define ENA_ADMIN_QUEUE_DEPTH 32
+
+
+#define ENA_CTRL_MAJOR 0
+#define ENA_CTRL_MINOR 0
+#define ENA_CTRL_SUB_MINOR 1
+
+#define MIN_ENA_CTRL_VER \
+ (((ENA_CTRL_MAJOR) << \
+ (ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \
+ ((ENA_CTRL_MINOR) << \
+ (ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \
+ (ENA_CTRL_SUB_MINOR))
+
+#define ENA_DMA_ADDR_TO_UINT32_LOW(x) ((u32)((u64)(x)))
+#define ENA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32))
+
+#define ENA_MMIO_READ_TIMEOUT 0xFFFFFFFF
+
+#define ENA_COM_BOUNCE_BUFFER_CNTRL_CNT 4
+
+#define ENA_REGS_ADMIN_INTR_MASK 1
+
+#define ENA_MAX_BACKOFF_DELAY_EXP 16U
+
+#define ENA_MIN_ADMIN_POLL_US 100
+
+#define ENA_MAX_ADMIN_POLL_US 5000
+
+/*****************************************************************************/
+/*****************************************************************************/
+/*****************************************************************************/
+
+enum ena_cmd_status {
+ ENA_CMD_SUBMITTED,
+ ENA_CMD_COMPLETED,
+ /* Abort - canceled by the driver */
+ ENA_CMD_ABORTED,
+};
+
+struct ena_comp_ctx {
+ struct completion wait_event;
+ struct ena_admin_acq_entry *user_cqe;
+ u32 comp_size;
+ enum ena_cmd_status status;
+ /* status from the device */
+ u8 comp_status;
+ u8 cmd_opcode;
+ bool occupied;
+};
+
+struct ena_com_stats_ctx {
+ struct ena_admin_aq_get_stats_cmd get_cmd;
+ struct ena_admin_acq_get_stats_resp get_resp;
+};
+
+static int ena_com_mem_addr_set(struct ena_com_dev *ena_dev,
+ struct ena_common_mem_addr *ena_addr,
+ dma_addr_t addr)
+{
+ if ((addr & GENMASK_ULL(ena_dev->dma_addr_bits - 1, 0)) != addr) {
+ netdev_err(ena_dev->net_device,
+ "DMA address has more bits that the device supports\n");
+ return -EINVAL;
+ }
+
+ ena_addr->mem_addr_low = lower_32_bits(addr);
+ ena_addr->mem_addr_high = (u16)upper_32_bits(addr);
+
+ return 0;
+}
+
+static int ena_com_admin_init_sq(struct ena_com_admin_queue *admin_queue)
+{
+ struct ena_com_dev *ena_dev = admin_queue->ena_dev;
+ struct ena_com_admin_sq *sq = &admin_queue->sq;
+ u16 size = ADMIN_SQ_SIZE(admin_queue->q_depth);
+
+ sq->entries = dma_alloc_coherent(admin_queue->q_dmadev, size,
+ &sq->dma_addr, GFP_KERNEL);
+
+ if (!sq->entries) {
+ netdev_err(ena_dev->net_device, "Memory allocation failed\n");
+ return -ENOMEM;
+ }
+
+ sq->head = 0;
+ sq->tail = 0;
+ sq->phase = 1;
+
+ sq->db_addr = NULL;
+
+ return 0;
+}
+
+static int ena_com_admin_init_cq(struct ena_com_admin_queue *admin_queue)
+{
+ struct ena_com_dev *ena_dev = admin_queue->ena_dev;
+ struct ena_com_admin_cq *cq = &admin_queue->cq;
+ u16 size = ADMIN_CQ_SIZE(admin_queue->q_depth);
+
+ cq->entries = dma_alloc_coherent(admin_queue->q_dmadev, size,
+ &cq->dma_addr, GFP_KERNEL);
+
+ if (!cq->entries) {
+ netdev_err(ena_dev->net_device, "Memory allocation failed\n");
+ return -ENOMEM;
+ }
+
+ cq->head = 0;
+ cq->phase = 1;
+
+ return 0;
+}
+
+static int ena_com_admin_init_aenq(struct ena_com_dev *ena_dev,
+ struct ena_aenq_handlers *aenq_handlers)
+{
+ struct ena_com_aenq *aenq = &ena_dev->aenq;
+ u32 addr_low, addr_high, aenq_caps;
+ u16 size;
+
+ ena_dev->aenq.q_depth = ENA_ASYNC_QUEUE_DEPTH;
+ size = ADMIN_AENQ_SIZE(ENA_ASYNC_QUEUE_DEPTH);
+ aenq->entries = dma_alloc_coherent(ena_dev->dmadev, size,
+ &aenq->dma_addr, GFP_KERNEL);
+
+ if (!aenq->entries) {
+ netdev_err(ena_dev->net_device, "Memory allocation failed\n");
+ return -ENOMEM;
+ }
+
+ aenq->head = aenq->q_depth;
+ aenq->phase = 1;
+
+ addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr);
+ addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr);
+
+ writel(addr_low, ena_dev->reg_bar + ENA_REGS_AENQ_BASE_LO_OFF);
+ writel(addr_high, ena_dev->reg_bar + ENA_REGS_AENQ_BASE_HI_OFF);
+
+ aenq_caps = 0;
+ aenq_caps |= ena_dev->aenq.q_depth & ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK;
+ aenq_caps |= (sizeof(struct ena_admin_aenq_entry)
+ << ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) &
+ ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK;
+ writel(aenq_caps, ena_dev->reg_bar + ENA_REGS_AENQ_CAPS_OFF);
+
+ if (unlikely(!aenq_handlers)) {
+ netdev_err(ena_dev->net_device,
+ "AENQ handlers pointer is NULL\n");
+ return -EINVAL;
+ }
+
+ aenq->aenq_handlers = aenq_handlers;
+
+ return 0;
+}
+
+static void comp_ctxt_release(struct ena_com_admin_queue *queue,
+ struct ena_comp_ctx *comp_ctx)
+{
+ comp_ctx->occupied = false;
+ atomic_dec(&queue->outstanding_cmds);
+}
+
+static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *admin_queue,
+ u16 command_id, bool capture)
+{
+ if (unlikely(command_id >= admin_queue->q_depth)) {
+ netdev_err(admin_queue->ena_dev->net_device,
+ "Command id is larger than the queue size. cmd_id: %u queue size %d\n",
+ command_id, admin_queue->q_depth);
+ return NULL;
+ }
+
+ if (unlikely(!admin_queue->comp_ctx)) {
+ netdev_err(admin_queue->ena_dev->net_device,
+ "Completion context is NULL\n");
+ return NULL;
+ }
+
+ if (unlikely(admin_queue->comp_ctx[command_id].occupied && capture)) {
+ netdev_err(admin_queue->ena_dev->net_device,
+ "Completion context is occupied\n");
+ return NULL;
+ }
+
+ if (capture) {
+ atomic_inc(&admin_queue->outstanding_cmds);
+ admin_queue->comp_ctx[command_id].occupied = true;
+ }
+
+ return &admin_queue->comp_ctx[command_id];
+}
+
+static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queue *admin_queue,
+ struct ena_admin_aq_entry *cmd,
+ size_t cmd_size_in_bytes,
+ struct ena_admin_acq_entry *comp,
+ size_t comp_size_in_bytes)
+{
+ struct ena_comp_ctx *comp_ctx;
+ u16 tail_masked, cmd_id;
+ u16 queue_size_mask;
+ u16 cnt;
+
+ queue_size_mask = admin_queue->q_depth - 1;
+
+ tail_masked = admin_queue->sq.tail & queue_size_mask;
+
+ /* In case of queue FULL */
+ cnt = (u16)atomic_read(&admin_queue->outstanding_cmds);
+ if (cnt >= admin_queue->q_depth) {
+ netdev_dbg(admin_queue->ena_dev->net_device,
+ "Admin queue is full.\n");
+ admin_queue->stats.out_of_space++;
+ return ERR_PTR(-ENOSPC);
+ }
+
+ cmd_id = admin_queue->curr_cmd_id;
+
+ cmd->aq_common_descriptor.flags |= admin_queue->sq.phase &
+ ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK;
+
+ cmd->aq_common_descriptor.command_id |= cmd_id &
+ ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
+
+ comp_ctx = get_comp_ctxt(admin_queue, cmd_id, true);
+ if (unlikely(!comp_ctx))
+ return ERR_PTR(-EINVAL);
+
+ comp_ctx->status = ENA_CMD_SUBMITTED;
+ comp_ctx->comp_size = (u32)comp_size_in_bytes;
+ comp_ctx->user_cqe = comp;
+ comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode;
+
+ reinit_completion(&comp_ctx->wait_event);
+
+ memcpy(&admin_queue->sq.entries[tail_masked], cmd, cmd_size_in_bytes);
+
+ admin_queue->curr_cmd_id = (admin_queue->curr_cmd_id + 1) &
+ queue_size_mask;
+
+ admin_queue->sq.tail++;
+ admin_queue->stats.submitted_cmd++;
+
+ if (unlikely((admin_queue->sq.tail & queue_size_mask) == 0))
+ admin_queue->sq.phase = !admin_queue->sq.phase;
+
+ writel(admin_queue->sq.tail, admin_queue->sq.db_addr);
+
+ return comp_ctx;
+}
+
+static int ena_com_init_comp_ctxt(struct ena_com_admin_queue *admin_queue)
+{
+ struct ena_com_dev *ena_dev = admin_queue->ena_dev;
+ size_t size = admin_queue->q_depth * sizeof(struct ena_comp_ctx);
+ struct ena_comp_ctx *comp_ctx;
+ u16 i;
+
+ admin_queue->comp_ctx =
+ devm_kzalloc(admin_queue->q_dmadev, size, GFP_KERNEL);
+ if (unlikely(!admin_queue->comp_ctx)) {
+ netdev_err(ena_dev->net_device, "Memory allocation failed\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < admin_queue->q_depth; i++) {
+ comp_ctx = get_comp_ctxt(admin_queue, i, false);
+ if (comp_ctx)
+ init_completion(&comp_ctx->wait_event);
+ }
+
+ return 0;
+}
+
+static struct ena_comp_ctx *ena_com_submit_admin_cmd(struct ena_com_admin_queue *admin_queue,
+ struct ena_admin_aq_entry *cmd,
+ size_t cmd_size_in_bytes,
+ struct ena_admin_acq_entry *comp,
+ size_t comp_size_in_bytes)
+{
+ unsigned long flags = 0;
+ struct ena_comp_ctx *comp_ctx;
+
+ spin_lock_irqsave(&admin_queue->q_lock, flags);
+ if (unlikely(!admin_queue->running_state)) {
+ spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+ return ERR_PTR(-ENODEV);
+ }
+ comp_ctx = __ena_com_submit_admin_cmd(admin_queue, cmd,
+ cmd_size_in_bytes,
+ comp,
+ comp_size_in_bytes);
+ if (IS_ERR(comp_ctx))
+ admin_queue->running_state = false;
+ spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+ return comp_ctx;
+}
+
+static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
+ struct ena_com_create_io_ctx *ctx,
+ struct ena_com_io_sq *io_sq)
+{
+ size_t size;
+ int dev_node = 0;
+
+ memset(&io_sq->desc_addr, 0x0, sizeof(io_sq->desc_addr));
+
+ io_sq->dma_addr_bits = (u8)ena_dev->dma_addr_bits;
+ io_sq->desc_entry_size =
+ (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ?
+ sizeof(struct ena_eth_io_tx_desc) :
+ sizeof(struct ena_eth_io_rx_desc);
+
+ size = io_sq->desc_entry_size * io_sq->q_depth;
+
+ if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) {
+ dev_node = dev_to_node(ena_dev->dmadev);
+ set_dev_node(ena_dev->dmadev, ctx->numa_node);
+ io_sq->desc_addr.virt_addr =
+ dma_alloc_coherent(ena_dev->dmadev, size,
+ &io_sq->desc_addr.phys_addr,
+ GFP_KERNEL);
+ set_dev_node(ena_dev->dmadev, dev_node);
+ if (!io_sq->desc_addr.virt_addr) {
+ io_sq->desc_addr.virt_addr =
+ dma_alloc_coherent(ena_dev->dmadev, size,
+ &io_sq->desc_addr.phys_addr,
+ GFP_KERNEL);
+ }
+
+ if (!io_sq->desc_addr.virt_addr) {
+ netdev_err(ena_dev->net_device,
+ "Memory allocation failed\n");
+ return -ENOMEM;
+ }
+ }
+
+ if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+ /* Allocate bounce buffers */
+ io_sq->bounce_buf_ctrl.buffer_size =
+ ena_dev->llq_info.desc_list_entry_size;
+ io_sq->bounce_buf_ctrl.buffers_num =
+ ENA_COM_BOUNCE_BUFFER_CNTRL_CNT;
+ io_sq->bounce_buf_ctrl.next_to_use = 0;
+
+ size = io_sq->bounce_buf_ctrl.buffer_size *
+ io_sq->bounce_buf_ctrl.buffers_num;
+
+ dev_node = dev_to_node(ena_dev->dmadev);
+ set_dev_node(ena_dev->dmadev, ctx->numa_node);
+ io_sq->bounce_buf_ctrl.base_buffer =
+ devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
+ set_dev_node(ena_dev->dmadev, dev_node);
+ if (!io_sq->bounce_buf_ctrl.base_buffer)
+ io_sq->bounce_buf_ctrl.base_buffer =
+ devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
+
+ if (!io_sq->bounce_buf_ctrl.base_buffer) {
+ netdev_err(ena_dev->net_device,
+ "Bounce buffer memory allocation failed\n");
+ return -ENOMEM;
+ }
+
+ memcpy(&io_sq->llq_info, &ena_dev->llq_info,
+ sizeof(io_sq->llq_info));
+
+ /* Initiate the first bounce buffer */
+ io_sq->llq_buf_ctrl.curr_bounce_buf =
+ ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl);
+ memset(io_sq->llq_buf_ctrl.curr_bounce_buf,
+ 0x0, io_sq->llq_info.desc_list_entry_size);
+ io_sq->llq_buf_ctrl.descs_left_in_line =
+ io_sq->llq_info.descs_num_before_header;
+ io_sq->disable_meta_caching =
+ io_sq->llq_info.disable_meta_caching;
+
+ if (io_sq->llq_info.max_entries_in_tx_burst > 0)
+ io_sq->entries_in_tx_burst_left =
+ io_sq->llq_info.max_entries_in_tx_burst;
+ }
+
+ io_sq->tail = 0;
+ io_sq->next_to_comp = 0;
+ io_sq->phase = 1;
+
+ return 0;
+}
+
+static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
+ struct ena_com_create_io_ctx *ctx,
+ struct ena_com_io_cq *io_cq)
+{
+ size_t size;
+ int prev_node = 0;
+
+ memset(&io_cq->cdesc_addr, 0x0, sizeof(io_cq->cdesc_addr));
+
+ /* Use the basic completion descriptor for Rx */
+ io_cq->cdesc_entry_size_in_bytes =
+ (io_cq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ?
+ sizeof(struct ena_eth_io_tx_cdesc) :
+ sizeof(struct ena_eth_io_rx_cdesc_base);
+
+ size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth;
+
+ prev_node = dev_to_node(ena_dev->dmadev);
+ set_dev_node(ena_dev->dmadev, ctx->numa_node);
+ io_cq->cdesc_addr.virt_addr =
+ dma_alloc_coherent(ena_dev->dmadev, size,
+ &io_cq->cdesc_addr.phys_addr, GFP_KERNEL);
+ set_dev_node(ena_dev->dmadev, prev_node);
+ if (!io_cq->cdesc_addr.virt_addr) {
+ io_cq->cdesc_addr.virt_addr =
+ dma_alloc_coherent(ena_dev->dmadev, size,
+ &io_cq->cdesc_addr.phys_addr,
+ GFP_KERNEL);
+ }
+
+ if (!io_cq->cdesc_addr.virt_addr) {
+ netdev_err(ena_dev->net_device, "Memory allocation failed\n");
+ return -ENOMEM;
+ }
+
+ io_cq->phase = 1;
+ io_cq->head = 0;
+
+ return 0;
+}
+
+static void ena_com_handle_single_admin_completion(struct ena_com_admin_queue *admin_queue,
+ struct ena_admin_acq_entry *cqe)
+{
+ struct ena_comp_ctx *comp_ctx;
+ u16 cmd_id;
+
+ cmd_id = cqe->acq_common_descriptor.command &
+ ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
+
+ comp_ctx = get_comp_ctxt(admin_queue, cmd_id, false);
+ if (unlikely(!comp_ctx)) {
+ netdev_err(admin_queue->ena_dev->net_device,
+ "comp_ctx is NULL. Changing the admin queue running state\n");
+ admin_queue->running_state = false;
+ return;
+ }
+
+ comp_ctx->status = ENA_CMD_COMPLETED;
+ comp_ctx->comp_status = cqe->acq_common_descriptor.status;
+
+ if (comp_ctx->user_cqe)
+ memcpy(comp_ctx->user_cqe, (void *)cqe, comp_ctx->comp_size);
+
+ if (!admin_queue->polling)
+ complete(&comp_ctx->wait_event);
+}
+
+static void ena_com_handle_admin_completion(struct ena_com_admin_queue *admin_queue)
+{
+ struct ena_admin_acq_entry *cqe = NULL;
+ u16 comp_num = 0;
+ u16 head_masked;
+ u8 phase;
+
+ head_masked = admin_queue->cq.head & (admin_queue->q_depth - 1);
+ phase = admin_queue->cq.phase;
+
+ cqe = &admin_queue->cq.entries[head_masked];
+
+ /* Go over all the completions */
+ while ((READ_ONCE(cqe->acq_common_descriptor.flags) &
+ ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) {
+ /* Do not read the rest of the completion entry before the
+ * phase bit was validated
+ */
+ dma_rmb();
+ ena_com_handle_single_admin_completion(admin_queue, cqe);
+
+ head_masked++;
+ comp_num++;
+ if (unlikely(head_masked == admin_queue->q_depth)) {
+ head_masked = 0;
+ phase = !phase;
+ }
+
+ cqe = &admin_queue->cq.entries[head_masked];
+ }
+
+ admin_queue->cq.head += comp_num;
+ admin_queue->cq.phase = phase;
+ admin_queue->sq.head += comp_num;
+ admin_queue->stats.completed_cmd += comp_num;
+}
+
+static int ena_com_comp_status_to_errno(struct ena_com_admin_queue *admin_queue,
+ u8 comp_status)
+{
+ if (unlikely(comp_status != 0))
+ netdev_err(admin_queue->ena_dev->net_device,
+ "Admin command failed[%u]\n", comp_status);
+
+ switch (comp_status) {
+ case ENA_ADMIN_SUCCESS:
+ return 0;
+ case ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE:
+ return -ENOMEM;
+ case ENA_ADMIN_UNSUPPORTED_OPCODE:
+ return -EOPNOTSUPP;
+ case ENA_ADMIN_BAD_OPCODE:
+ case ENA_ADMIN_MALFORMED_REQUEST:
+ case ENA_ADMIN_ILLEGAL_PARAMETER:
+ case ENA_ADMIN_UNKNOWN_ERROR:
+ return -EINVAL;
+ case ENA_ADMIN_RESOURCE_BUSY:
+ return -EAGAIN;
+ }
+
+ return -EINVAL;
+}
+
+static void ena_delay_exponential_backoff_us(u32 exp, u32 delay_us)
+{
+ exp = min_t(u32, exp, ENA_MAX_BACKOFF_DELAY_EXP);
+ delay_us = max_t(u32, ENA_MIN_ADMIN_POLL_US, delay_us);
+ delay_us = min_t(u32, delay_us * (1U << exp), ENA_MAX_ADMIN_POLL_US);
+ usleep_range(delay_us, 2 * delay_us);
+}
+
+static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx,
+ struct ena_com_admin_queue *admin_queue)
+{
+ unsigned long flags = 0;
+ unsigned long timeout;
+ int ret;
+ u32 exp = 0;
+
+ timeout = jiffies + usecs_to_jiffies(admin_queue->completion_timeout);
+
+ while (1) {
+ spin_lock_irqsave(&admin_queue->q_lock, flags);
+ ena_com_handle_admin_completion(admin_queue);
+ spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+ if (comp_ctx->status != ENA_CMD_SUBMITTED)
+ break;
+
+ if (time_is_before_jiffies(timeout)) {
+ netdev_err(admin_queue->ena_dev->net_device,
+ "Wait for completion (polling) timeout\n");
+ /* ENA didn't have any completion */
+ spin_lock_irqsave(&admin_queue->q_lock, flags);
+ admin_queue->stats.no_completion++;
+ admin_queue->running_state = false;
+ spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+ ret = -ETIME;
+ goto err;
+ }
+
+ ena_delay_exponential_backoff_us(exp++,
+ admin_queue->ena_dev->ena_min_poll_delay_us);
+ }
+
+ if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) {
+ netdev_err(admin_queue->ena_dev->net_device,
+ "Command was aborted\n");
+ spin_lock_irqsave(&admin_queue->q_lock, flags);
+ admin_queue->stats.aborted_cmd++;
+ spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+ ret = -ENODEV;
+ goto err;
+ }
+
+ WARN(comp_ctx->status != ENA_CMD_COMPLETED, "Invalid comp status %d\n",
+ comp_ctx->status);
+
+ ret = ena_com_comp_status_to_errno(admin_queue, comp_ctx->comp_status);
+err:
+ comp_ctxt_release(admin_queue, comp_ctx);
+ return ret;
+}
+
+/*
+ * Set the LLQ configurations of the firmware
+ *
+ * The driver provides only the enabled feature values to the device,
+ * which in turn, checks if they are supported.
+ */
+static int ena_com_set_llq(struct ena_com_dev *ena_dev)
+{
+ struct ena_com_admin_queue *admin_queue;
+ struct ena_admin_set_feat_cmd cmd;
+ struct ena_admin_set_feat_resp resp;
+ struct ena_com_llq_info *llq_info = &ena_dev->llq_info;
+ int ret;
+
+ memset(&cmd, 0x0, sizeof(cmd));
+ admin_queue = &ena_dev->admin_queue;
+
+ cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+ cmd.feat_common.feature_id = ENA_ADMIN_LLQ;
+
+ cmd.u.llq.header_location_ctrl_enabled = llq_info->header_location_ctrl;
+ cmd.u.llq.entry_size_ctrl_enabled = llq_info->desc_list_entry_size_ctrl;
+ cmd.u.llq.desc_num_before_header_enabled = llq_info->descs_num_before_header;
+ cmd.u.llq.descriptors_stride_ctrl_enabled = llq_info->desc_stride_ctrl;
+
+ cmd.u.llq.accel_mode.u.set.enabled_flags =
+ BIT(ENA_ADMIN_DISABLE_META_CACHING) |
+ BIT(ENA_ADMIN_LIMIT_TX_BURST);
+
+ ret = ena_com_execute_admin_command(admin_queue,
+ (struct ena_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct ena_admin_acq_entry *)&resp,
+ sizeof(resp));
+
+ if (unlikely(ret))
+ netdev_err(ena_dev->net_device,
+ "Failed to set LLQ configurations: %d\n", ret);
+
+ return ret;
+}
+
+static int ena_com_config_llq_info(struct ena_com_dev *ena_dev,
+ struct ena_admin_feature_llq_desc *llq_features,
+ struct ena_llq_configurations *llq_default_cfg)
+{
+ struct ena_com_llq_info *llq_info = &ena_dev->llq_info;
+ struct ena_admin_accel_mode_get llq_accel_mode_get;
+ u16 supported_feat;
+ int rc;
+
+ memset(llq_info, 0, sizeof(*llq_info));
+
+ supported_feat = llq_features->header_location_ctrl_supported;
+
+ if (likely(supported_feat & llq_default_cfg->llq_header_location)) {
+ llq_info->header_location_ctrl =
+ llq_default_cfg->llq_header_location;
+ } else {
+ netdev_err(ena_dev->net_device,
+ "Invalid header location control, supported: 0x%x\n",
+ supported_feat);
+ return -EINVAL;
+ }
+
+ if (likely(llq_info->header_location_ctrl == ENA_ADMIN_INLINE_HEADER)) {
+ supported_feat = llq_features->descriptors_stride_ctrl_supported;
+ if (likely(supported_feat & llq_default_cfg->llq_stride_ctrl)) {
+ llq_info->desc_stride_ctrl = llq_default_cfg->llq_stride_ctrl;
+ } else {
+ if (supported_feat & ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY) {
+ llq_info->desc_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
+ } else if (supported_feat & ENA_ADMIN_SINGLE_DESC_PER_ENTRY) {
+ llq_info->desc_stride_ctrl = ENA_ADMIN_SINGLE_DESC_PER_ENTRY;
+ } else {
+ netdev_err(ena_dev->net_device,
+ "Invalid desc_stride_ctrl, supported: 0x%x\n",
+ supported_feat);
+ return -EINVAL;
+ }
+
+ netdev_err(ena_dev->net_device,
+ "Default llq stride ctrl is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n",
+ llq_default_cfg->llq_stride_ctrl,
+ supported_feat, llq_info->desc_stride_ctrl);
+ }
+ } else {
+ llq_info->desc_stride_ctrl = 0;
+ }
+
+ supported_feat = llq_features->entry_size_ctrl_supported;
+ if (likely(supported_feat & llq_default_cfg->llq_ring_entry_size)) {
+ llq_info->desc_list_entry_size_ctrl = llq_default_cfg->llq_ring_entry_size;
+ llq_info->desc_list_entry_size = llq_default_cfg->llq_ring_entry_size_value;
+ } else {
+ if (supported_feat & ENA_ADMIN_LIST_ENTRY_SIZE_128B) {
+ llq_info->desc_list_entry_size_ctrl = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
+ llq_info->desc_list_entry_size = 128;
+ } else if (supported_feat & ENA_ADMIN_LIST_ENTRY_SIZE_192B) {
+ llq_info->desc_list_entry_size_ctrl = ENA_ADMIN_LIST_ENTRY_SIZE_192B;
+ llq_info->desc_list_entry_size = 192;
+ } else if (supported_feat & ENA_ADMIN_LIST_ENTRY_SIZE_256B) {
+ llq_info->desc_list_entry_size_ctrl = ENA_ADMIN_LIST_ENTRY_SIZE_256B;
+ llq_info->desc_list_entry_size = 256;
+ } else {
+ netdev_err(ena_dev->net_device,
+ "Invalid entry_size_ctrl, supported: 0x%x\n",
+ supported_feat);
+ return -EINVAL;
+ }
+
+ netdev_err(ena_dev->net_device,
+ "Default llq ring entry size is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n",
+ llq_default_cfg->llq_ring_entry_size, supported_feat,
+ llq_info->desc_list_entry_size);
+ }
+ if (unlikely(llq_info->desc_list_entry_size & 0x7)) {
+ /* The desc list entry size should be whole multiply of 8
+ * This requirement comes from __iowrite64_copy()
+ */
+ netdev_err(ena_dev->net_device, "Illegal entry size %d\n",
+ llq_info->desc_list_entry_size);
+ return -EINVAL;
+ }
+
+ if (llq_info->desc_stride_ctrl == ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY)
+ llq_info->descs_per_entry = llq_info->desc_list_entry_size /
+ sizeof(struct ena_eth_io_tx_desc);
+ else
+ llq_info->descs_per_entry = 1;
+
+ supported_feat = llq_features->desc_num_before_header_supported;
+ if (likely(supported_feat & llq_default_cfg->llq_num_decs_before_header)) {
+ llq_info->descs_num_before_header = llq_default_cfg->llq_num_decs_before_header;
+ } else {
+ if (supported_feat & ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2) {
+ llq_info->descs_num_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
+ } else if (supported_feat & ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_1) {
+ llq_info->descs_num_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_1;
+ } else if (supported_feat & ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_4) {
+ llq_info->descs_num_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_4;
+ } else if (supported_feat & ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_8) {
+ llq_info->descs_num_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_8;
+ } else {
+ netdev_err(ena_dev->net_device,
+ "Invalid descs_num_before_header, supported: 0x%x\n",
+ supported_feat);
+ return -EINVAL;
+ }
+
+ netdev_err(ena_dev->net_device,
+ "Default llq num descs before header is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n",
+ llq_default_cfg->llq_num_decs_before_header,
+ supported_feat, llq_info->descs_num_before_header);
+ }
+ /* Check for accelerated queue supported */
+ llq_accel_mode_get = llq_features->accel_mode.u.get;
+
+ llq_info->disable_meta_caching =
+ !!(llq_accel_mode_get.supported_flags &
+ BIT(ENA_ADMIN_DISABLE_META_CACHING));
+
+ if (llq_accel_mode_get.supported_flags & BIT(ENA_ADMIN_LIMIT_TX_BURST))
+ llq_info->max_entries_in_tx_burst =
+ llq_accel_mode_get.max_tx_burst_size /
+ llq_default_cfg->llq_ring_entry_size_value;
+
+ rc = ena_com_set_llq(ena_dev);
+ if (rc)
+ netdev_err(ena_dev->net_device,
+ "Cannot set LLQ configuration: %d\n", rc);
+
+ return rc;
+}
+
+static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *comp_ctx,
+ struct ena_com_admin_queue *admin_queue)
+{
+ unsigned long flags = 0;
+ int ret;
+
+ wait_for_completion_timeout(&comp_ctx->wait_event,
+ usecs_to_jiffies(
+ admin_queue->completion_timeout));
+
+ /* In case the command wasn't completed find out the root cause.
+ * There might be 2 kinds of errors
+ * 1) No completion (timeout reached)
+ * 2) There is completion but the device didn't get any msi-x interrupt.
+ */
+ if (unlikely(comp_ctx->status == ENA_CMD_SUBMITTED)) {
+ spin_lock_irqsave(&admin_queue->q_lock, flags);
+ ena_com_handle_admin_completion(admin_queue);
+ admin_queue->stats.no_completion++;
+ spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+ if (comp_ctx->status == ENA_CMD_COMPLETED) {
+ netdev_err(admin_queue->ena_dev->net_device,
+ "The ena device sent a completion but the driver didn't receive a MSI-X interrupt (cmd %d), autopolling mode is %s\n",
+ comp_ctx->cmd_opcode,
+ admin_queue->auto_polling ? "ON" : "OFF");
+ /* Check if fallback to polling is enabled */
+ if (admin_queue->auto_polling)
+ admin_queue->polling = true;
+ } else {
+ netdev_err(admin_queue->ena_dev->net_device,
+ "The ena device didn't send a completion for the admin cmd %d status %d\n",
+ comp_ctx->cmd_opcode, comp_ctx->status);
+ }
+ /* Check if shifted to polling mode.
+ * This will happen if there is a completion without an interrupt
+ * and autopolling mode is enabled. Continuing normal execution in such case
+ */
+ if (!admin_queue->polling) {
+ admin_queue->running_state = false;
+ ret = -ETIME;
+ goto err;
+ }
+ }
+
+ ret = ena_com_comp_status_to_errno(admin_queue, comp_ctx->comp_status);
+err:
+ comp_ctxt_release(admin_queue, comp_ctx);
+ return ret;
+}
+
+/* This method read the hardware device register through posting writes
+ * and waiting for response
+ * On timeout the function will return ENA_MMIO_READ_TIMEOUT
+ */
+static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
+{
+ struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+ volatile struct ena_admin_ena_mmio_req_read_less_resp *read_resp =
+ mmio_read->read_resp;
+ u32 mmio_read_reg, ret, i;
+ unsigned long flags = 0;
+ u32 timeout = mmio_read->reg_read_to;
+
+ might_sleep();
+
+ if (timeout == 0)
+ timeout = ENA_REG_READ_TIMEOUT;
+
+ /* If readless is disabled, perform regular read */
+ if (!mmio_read->readless_supported)
+ return readl(ena_dev->reg_bar + offset);
+
+ spin_lock_irqsave(&mmio_read->lock, flags);
+ mmio_read->seq_num++;
+
+ read_resp->req_id = mmio_read->seq_num + 0xDEAD;
+ mmio_read_reg = (offset << ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) &
+ ENA_REGS_MMIO_REG_READ_REG_OFF_MASK;
+ mmio_read_reg |= mmio_read->seq_num &
+ ENA_REGS_MMIO_REG_READ_REQ_ID_MASK;
+
+ writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
+
+ for (i = 0; i < timeout; i++) {
+ if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num)
+ break;
+
+ udelay(1);
+ }
+
+ if (unlikely(i == timeout)) {
+ netdev_err(ena_dev->net_device,
+ "Reading reg failed for timeout. expected: req id[%u] offset[%u] actual: req id[%u] offset[%u]\n",
+ mmio_read->seq_num, offset, read_resp->req_id,
+ read_resp->reg_off);
+ ret = ENA_MMIO_READ_TIMEOUT;
+ goto err;
+ }
+
+ if (read_resp->reg_off != offset) {
+ netdev_err(ena_dev->net_device,
+ "Read failure: wrong offset provided\n");
+ ret = ENA_MMIO_READ_TIMEOUT;
+ } else {
+ ret = read_resp->reg_val;
+ }
+err:
+ spin_unlock_irqrestore(&mmio_read->lock, flags);
+
+ return ret;
+}
+
+/* There are two types to wait for completion.
+ * Polling mode - wait until the completion is available.
+ * Async mode - wait on wait queue until the completion is ready
+ * (or the timeout expired).
+ * It is expected that the IRQ called ena_com_handle_admin_completion
+ * to mark the completions.
+ */
+static int ena_com_wait_and_process_admin_cq(struct ena_comp_ctx *comp_ctx,
+ struct ena_com_admin_queue *admin_queue)
+{
+ if (admin_queue->polling)
+ return ena_com_wait_and_process_admin_cq_polling(comp_ctx,
+ admin_queue);
+
+ return ena_com_wait_and_process_admin_cq_interrupts(comp_ctx,
+ admin_queue);
+}
+
+static int ena_com_destroy_io_sq(struct ena_com_dev *ena_dev,
+ struct ena_com_io_sq *io_sq)
+{
+ struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+ struct ena_admin_aq_destroy_sq_cmd destroy_cmd;
+ struct ena_admin_acq_destroy_sq_resp_desc destroy_resp;
+ u8 direction;
+ int ret;
+
+ memset(&destroy_cmd, 0x0, sizeof(destroy_cmd));
+
+ if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX)
+ direction = ENA_ADMIN_SQ_DIRECTION_TX;
+ else
+ direction = ENA_ADMIN_SQ_DIRECTION_RX;
+
+ destroy_cmd.sq.sq_identity |= (direction <<
+ ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT) &
+ ENA_ADMIN_SQ_SQ_DIRECTION_MASK;
+
+ destroy_cmd.sq.sq_idx = io_sq->idx;
+ destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_SQ;
+
+ ret = ena_com_execute_admin_command(admin_queue,
+ (struct ena_admin_aq_entry *)&destroy_cmd,
+ sizeof(destroy_cmd),
+ (struct ena_admin_acq_entry *)&destroy_resp,
+ sizeof(destroy_resp));
+
+ if (unlikely(ret && (ret != -ENODEV)))
+ netdev_err(ena_dev->net_device,
+ "Failed to destroy io sq error: %d\n", ret);
+
+ return ret;
+}
+
+static void ena_com_io_queue_free(struct ena_com_dev *ena_dev,
+ struct ena_com_io_sq *io_sq,
+ struct ena_com_io_cq *io_cq)
+{
+ size_t size;
+
+ if (io_cq->cdesc_addr.virt_addr) {
+ size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth;
+
+ dma_free_coherent(ena_dev->dmadev, size,
+ io_cq->cdesc_addr.virt_addr,
+ io_cq->cdesc_addr.phys_addr);
+
+ io_cq->cdesc_addr.virt_addr = NULL;
+ }
+
+ if (io_sq->desc_addr.virt_addr) {
+ size = io_sq->desc_entry_size * io_sq->q_depth;
+
+ dma_free_coherent(ena_dev->dmadev, size,
+ io_sq->desc_addr.virt_addr,
+ io_sq->desc_addr.phys_addr);
+
+ io_sq->desc_addr.virt_addr = NULL;
+ }
+
+ if (io_sq->bounce_buf_ctrl.base_buffer) {
+ devm_kfree(ena_dev->dmadev, io_sq->bounce_buf_ctrl.base_buffer);
+ io_sq->bounce_buf_ctrl.base_buffer = NULL;
+ }
+}
+
+static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout,
+ u16 exp_state)
+{
+ u32 val, exp = 0;
+ unsigned long timeout_stamp;
+
+ /* Convert timeout from resolution of 100ms to us resolution. */
+ timeout_stamp = jiffies + usecs_to_jiffies(100 * 1000 * timeout);
+
+ while (1) {
+ val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
+
+ if (unlikely(val == ENA_MMIO_READ_TIMEOUT)) {
+ netdev_err(ena_dev->net_device,
+ "Reg read timeout occurred\n");
+ return -ETIME;
+ }
+
+ if ((val & ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) ==
+ exp_state)
+ return 0;
+
+ if (time_is_before_jiffies(timeout_stamp))
+ return -ETIME;
+
+ ena_delay_exponential_backoff_us(exp++, ena_dev->ena_min_poll_delay_us);
+ }
+}
+
+static bool ena_com_check_supported_feature_id(struct ena_com_dev *ena_dev,
+ enum ena_admin_aq_feature_id feature_id)
+{
+ u32 feature_mask = 1 << feature_id;
+
+ /* Device attributes is always supported */
+ if ((feature_id != ENA_ADMIN_DEVICE_ATTRIBUTES) &&
+ !(ena_dev->supported_features & feature_mask))
+ return false;
+
+ return true;
+}
+
+static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev,
+ struct ena_admin_get_feat_resp *get_resp,
+ enum ena_admin_aq_feature_id feature_id,
+ dma_addr_t control_buf_dma_addr,
+ u32 control_buff_size,
+ u8 feature_ver)
+{
+ struct ena_com_admin_queue *admin_queue;
+ struct ena_admin_get_feat_cmd get_cmd;
+ int ret;
+
+ if (!ena_com_check_supported_feature_id(ena_dev, feature_id)) {
+ netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n",
+ feature_id);
+ return -EOPNOTSUPP;
+ }
+
+ memset(&get_cmd, 0x0, sizeof(get_cmd));
+ admin_queue = &ena_dev->admin_queue;
+
+ get_cmd.aq_common_descriptor.opcode = ENA_ADMIN_GET_FEATURE;
+
+ if (control_buff_size)
+ get_cmd.aq_common_descriptor.flags =
+ ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+ else
+ get_cmd.aq_common_descriptor.flags = 0;
+
+ ret = ena_com_mem_addr_set(ena_dev,
+ &get_cmd.control_buffer.address,
+ control_buf_dma_addr);
+ if (unlikely(ret)) {
+ netdev_err(ena_dev->net_device, "Memory address set failed\n");
+ return ret;
+ }
+
+ get_cmd.control_buffer.length = control_buff_size;
+ get_cmd.feat_common.feature_version = feature_ver;
+ get_cmd.feat_common.feature_id = feature_id;
+
+ ret = ena_com_execute_admin_command(admin_queue,
+ (struct ena_admin_aq_entry *)
+ &get_cmd,
+ sizeof(get_cmd),
+ (struct ena_admin_acq_entry *)
+ get_resp,
+ sizeof(*get_resp));
+
+ if (unlikely(ret))
+ netdev_err(ena_dev->net_device,
+ "Failed to submit get_feature command %d error: %d\n",
+ feature_id, ret);
+
+ return ret;
+}
+
+static int ena_com_get_feature(struct ena_com_dev *ena_dev,
+ struct ena_admin_get_feat_resp *get_resp,
+ enum ena_admin_aq_feature_id feature_id,
+ u8 feature_ver)
+{
+ return ena_com_get_feature_ex(ena_dev,
+ get_resp,
+ feature_id,
+ 0,
+ 0,
+ feature_ver);
+}
+
+int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev)
+{
+ return ena_dev->rss.hash_func;
+}
+
+static void ena_com_hash_key_fill_default_key(struct ena_com_dev *ena_dev)
+{
+ struct ena_admin_feature_rss_flow_hash_control *hash_key =
+ (ena_dev->rss).hash_key;
+
+ netdev_rss_key_fill(&hash_key->key, sizeof(hash_key->key));
+ /* The key buffer is stored in the device in an array of
+ * uint32 elements.
+ */
+ hash_key->key_parts = ENA_ADMIN_RSS_KEY_PARTS;
+}
+
+static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev)
+{
+ struct ena_rss *rss = &ena_dev->rss;
+
+ if (!ena_com_check_supported_feature_id(ena_dev,
+ ENA_ADMIN_RSS_HASH_FUNCTION))
+ return -EOPNOTSUPP;
+
+ rss->hash_key =
+ dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key),
+ &rss->hash_key_dma_addr, GFP_KERNEL);
+
+ if (unlikely(!rss->hash_key))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void ena_com_hash_key_destroy(struct ena_com_dev *ena_dev)
+{
+ struct ena_rss *rss = &ena_dev->rss;
+
+ if (rss->hash_key)
+ dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_key),
+ rss->hash_key, rss->hash_key_dma_addr);
+ rss->hash_key = NULL;
+}
+
+static int ena_com_hash_ctrl_init(struct ena_com_dev *ena_dev)
+{
+ struct ena_rss *rss = &ena_dev->rss;
+
+ rss->hash_ctrl =
+ dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl),
+ &rss->hash_ctrl_dma_addr, GFP_KERNEL);
+
+ if (unlikely(!rss->hash_ctrl))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void ena_com_hash_ctrl_destroy(struct ena_com_dev *ena_dev)
+{
+ struct ena_rss *rss = &ena_dev->rss;
+
+ if (rss->hash_ctrl)
+ dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl),
+ rss->hash_ctrl, rss->hash_ctrl_dma_addr);
+ rss->hash_ctrl = NULL;
+}
+
+static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev,
+ u16 log_size)
+{
+ struct ena_rss *rss = &ena_dev->rss;
+ struct ena_admin_get_feat_resp get_resp;
+ size_t tbl_size;
+ int ret;
+
+ ret = ena_com_get_feature(ena_dev, &get_resp,
+ ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG, 0);
+ if (unlikely(ret))
+ return ret;
+
+ if ((get_resp.u.ind_table.min_size > log_size) ||
+ (get_resp.u.ind_table.max_size < log_size)) {
+ netdev_err(ena_dev->net_device,
+ "Indirect table size doesn't fit. requested size: %d while min is:%d and max %d\n",
+ 1 << log_size, 1 << get_resp.u.ind_table.min_size,
+ 1 << get_resp.u.ind_table.max_size);
+ return -EINVAL;
+ }
+
+ tbl_size = (1ULL << log_size) *
+ sizeof(struct ena_admin_rss_ind_table_entry);
+
+ rss->rss_ind_tbl =
+ dma_alloc_coherent(ena_dev->dmadev, tbl_size,
+ &rss->rss_ind_tbl_dma_addr, GFP_KERNEL);
+ if (unlikely(!rss->rss_ind_tbl))
+ goto mem_err1;
+
+ tbl_size = (1ULL << log_size) * sizeof(u16);
+ rss->host_rss_ind_tbl =
+ devm_kzalloc(ena_dev->dmadev, tbl_size, GFP_KERNEL);
+ if (unlikely(!rss->host_rss_ind_tbl))
+ goto mem_err2;
+
+ rss->tbl_log_size = log_size;
+
+ return 0;
+
+mem_err2:
+ tbl_size = (1ULL << log_size) *
+ sizeof(struct ena_admin_rss_ind_table_entry);
+
+ dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl,
+ rss->rss_ind_tbl_dma_addr);
+ rss->rss_ind_tbl = NULL;
+mem_err1:
+ rss->tbl_log_size = 0;
+ return -ENOMEM;
+}
+
+static void ena_com_indirect_table_destroy(struct ena_com_dev *ena_dev)
+{
+ struct ena_rss *rss = &ena_dev->rss;
+ size_t tbl_size = (1ULL << rss->tbl_log_size) *
+ sizeof(struct ena_admin_rss_ind_table_entry);
+
+ if (rss->rss_ind_tbl)
+ dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl,
+ rss->rss_ind_tbl_dma_addr);
+ rss->rss_ind_tbl = NULL;
+
+ if (rss->host_rss_ind_tbl)
+ devm_kfree(ena_dev->dmadev, rss->host_rss_ind_tbl);
+ rss->host_rss_ind_tbl = NULL;
+}
+
+static int ena_com_create_io_sq(struct ena_com_dev *ena_dev,
+ struct ena_com_io_sq *io_sq, u16 cq_idx)
+{
+ struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+ struct ena_admin_aq_create_sq_cmd create_cmd;
+ struct ena_admin_acq_create_sq_resp_desc cmd_completion;
+ u8 direction;
+ int ret;
+
+ memset(&create_cmd, 0x0, sizeof(create_cmd));
+
+ create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_SQ;
+
+ if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX)
+ direction = ENA_ADMIN_SQ_DIRECTION_TX;
+ else
+ direction = ENA_ADMIN_SQ_DIRECTION_RX;
+
+ create_cmd.sq_identity |= (direction <<
+ ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT) &
+ ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK;
+
+ create_cmd.sq_caps_2 |= io_sq->mem_queue_type &
+ ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK;
+
+ create_cmd.sq_caps_2 |= (ENA_ADMIN_COMPLETION_POLICY_DESC <<
+ ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT) &
+ ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK;
+
+ create_cmd.sq_caps_3 |=
+ ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK;
+
+ create_cmd.cq_idx = cq_idx;
+ create_cmd.sq_depth = io_sq->q_depth;
+
+ if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) {
+ ret = ena_com_mem_addr_set(ena_dev,
+ &create_cmd.sq_ba,
+ io_sq->desc_addr.phys_addr);
+ if (unlikely(ret)) {
+ netdev_err(ena_dev->net_device,
+ "Memory address set failed\n");
+ return ret;
+ }
+ }
+
+ ret = ena_com_execute_admin_command(admin_queue,
+ (struct ena_admin_aq_entry *)&create_cmd,
+ sizeof(create_cmd),
+ (struct ena_admin_acq_entry *)&cmd_completion,
+ sizeof(cmd_completion));
+ if (unlikely(ret)) {
+ netdev_err(ena_dev->net_device,
+ "Failed to create IO SQ. error: %d\n", ret);
+ return ret;
+ }
+
+ io_sq->idx = cmd_completion.sq_idx;
+
+ io_sq->db_addr = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+ (uintptr_t)cmd_completion.sq_doorbell_offset);
+
+ if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+ io_sq->header_addr = (u8 __iomem *)((uintptr_t)ena_dev->mem_bar
+ + cmd_completion.llq_headers_offset);
+
+ io_sq->desc_addr.pbuf_dev_addr =
+ (u8 __iomem *)((uintptr_t)ena_dev->mem_bar +
+ cmd_completion.llq_descriptors_offset);
+ }
+
+ netdev_dbg(ena_dev->net_device, "Created sq[%u], depth[%u]\n",
+ io_sq->idx, io_sq->q_depth);
+
+ return ret;
+}
+
+static int ena_com_ind_tbl_convert_to_device(struct ena_com_dev *ena_dev)
+{
+ struct ena_rss *rss = &ena_dev->rss;
+ struct ena_com_io_sq *io_sq;
+ u16 qid;
+ int i;
+
+ for (i = 0; i < 1 << rss->tbl_log_size; i++) {
+ qid = rss->host_rss_ind_tbl[i];
+ if (qid >= ENA_TOTAL_NUM_QUEUES)
+ return -EINVAL;
+
+ io_sq = &ena_dev->io_sq_queues[qid];
+
+ if (io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX)
+ return -EINVAL;
+
+ rss->rss_ind_tbl[i].cq_idx = io_sq->idx;
+ }
+
+ return 0;
+}
+
+static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev,
+ u16 intr_delay_resolution)
+{
+ u16 prev_intr_delay_resolution = ena_dev->intr_delay_resolution;
+
+ if (unlikely(!intr_delay_resolution)) {
+ netdev_err(ena_dev->net_device,
+ "Illegal intr_delay_resolution provided. Going to use default 1 usec resolution\n");
+ intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
+ }
+
+ /* update Rx */
+ ena_dev->intr_moder_rx_interval =
+ ena_dev->intr_moder_rx_interval *
+ prev_intr_delay_resolution /
+ intr_delay_resolution;
+
+ /* update Tx */
+ ena_dev->intr_moder_tx_interval =
+ ena_dev->intr_moder_tx_interval *
+ prev_intr_delay_resolution /
+ intr_delay_resolution;
+
+ ena_dev->intr_delay_resolution = intr_delay_resolution;
+}
+
+/*****************************************************************************/
+/******************************* API ******************************/
+/*****************************************************************************/
+
+int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue,
+ struct ena_admin_aq_entry *cmd,
+ size_t cmd_size,
+ struct ena_admin_acq_entry *comp,
+ size_t comp_size)
+{
+ struct ena_comp_ctx *comp_ctx;
+ int ret;
+
+ comp_ctx = ena_com_submit_admin_cmd(admin_queue, cmd, cmd_size,
+ comp, comp_size);
+ if (IS_ERR(comp_ctx)) {
+ ret = PTR_ERR(comp_ctx);
+ if (ret == -ENODEV)
+ netdev_dbg(admin_queue->ena_dev->net_device,
+ "Failed to submit command [%d]\n", ret);
+ else
+ netdev_err(admin_queue->ena_dev->net_device,
+ "Failed to submit command [%d]\n", ret);
+
+ return ret;
+ }
+
+ ret = ena_com_wait_and_process_admin_cq(comp_ctx, admin_queue);
+ if (unlikely(ret)) {
+ if (admin_queue->running_state)
+ netdev_err(admin_queue->ena_dev->net_device,
+ "Failed to process command. ret = %d\n", ret);
+ else
+ netdev_dbg(admin_queue->ena_dev->net_device,
+ "Failed to process command. ret = %d\n", ret);
+ }
+ return ret;
+}
+
+int ena_com_create_io_cq(struct ena_com_dev *ena_dev,
+ struct ena_com_io_cq *io_cq)
+{
+ struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+ struct ena_admin_aq_create_cq_cmd create_cmd;
+ struct ena_admin_acq_create_cq_resp_desc cmd_completion;
+ int ret;
+
+ memset(&create_cmd, 0x0, sizeof(create_cmd));
+
+ create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_CQ;
+
+ create_cmd.cq_caps_2 |= (io_cq->cdesc_entry_size_in_bytes / 4) &
+ ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK;
+ create_cmd.cq_caps_1 |=
+ ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK;
+
+ create_cmd.msix_vector = io_cq->msix_vector;
+ create_cmd.cq_depth = io_cq->q_depth;
+
+ ret = ena_com_mem_addr_set(ena_dev,
+ &create_cmd.cq_ba,
+ io_cq->cdesc_addr.phys_addr);
+ if (unlikely(ret)) {
+ netdev_err(ena_dev->net_device, "Memory address set failed\n");
+ return ret;
+ }
+
+ ret = ena_com_execute_admin_command(admin_queue,
+ (struct ena_admin_aq_entry *)&create_cmd,
+ sizeof(create_cmd),
+ (struct ena_admin_acq_entry *)&cmd_completion,
+ sizeof(cmd_completion));
+ if (unlikely(ret)) {
+ netdev_err(ena_dev->net_device,
+ "Failed to create IO CQ. error: %d\n", ret);
+ return ret;
+ }
+
+ io_cq->idx = cmd_completion.cq_idx;
+
+ io_cq->unmask_reg = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+ cmd_completion.cq_interrupt_unmask_register_offset);
+
+ if (cmd_completion.cq_head_db_register_offset)
+ io_cq->cq_head_db_reg =
+ (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+ cmd_completion.cq_head_db_register_offset);
+
+ if (cmd_completion.numa_node_register_offset)
+ io_cq->numa_node_cfg_reg =
+ (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+ cmd_completion.numa_node_register_offset);
+
+ netdev_dbg(ena_dev->net_device, "Created cq[%u], depth[%u]\n",
+ io_cq->idx, io_cq->q_depth);
+
+ return ret;
+}
+
+int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid,
+ struct ena_com_io_sq **io_sq,
+ struct ena_com_io_cq **io_cq)
+{
+ if (qid >= ENA_TOTAL_NUM_QUEUES) {
+ netdev_err(ena_dev->net_device,
+ "Invalid queue number %d but the max is %d\n", qid,
+ ENA_TOTAL_NUM_QUEUES);
+ return -EINVAL;
+ }
+
+ *io_sq = &ena_dev->io_sq_queues[qid];
+ *io_cq = &ena_dev->io_cq_queues[qid];
+
+ return 0;
+}
+
+void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev)
+{
+ struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+ struct ena_comp_ctx *comp_ctx;
+ u16 i;
+
+ if (!admin_queue->comp_ctx)
+ return;
+
+ for (i = 0; i < admin_queue->q_depth; i++) {
+ comp_ctx = get_comp_ctxt(admin_queue, i, false);
+ if (unlikely(!comp_ctx))
+ break;
+
+ comp_ctx->status = ENA_CMD_ABORTED;
+
+ complete(&comp_ctx->wait_event);
+ }
+}
+
+void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev)
+{
+ struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+ unsigned long flags = 0;
+ u32 exp = 0;
+
+ spin_lock_irqsave(&admin_queue->q_lock, flags);
+ while (atomic_read(&admin_queue->outstanding_cmds) != 0) {
+ spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+ ena_delay_exponential_backoff_us(exp++,
+ ena_dev->ena_min_poll_delay_us);
+ spin_lock_irqsave(&admin_queue->q_lock, flags);
+ }
+ spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+}
+
+int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev,
+ struct ena_com_io_cq *io_cq)
+{
+ struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+ struct ena_admin_aq_destroy_cq_cmd destroy_cmd;
+ struct ena_admin_acq_destroy_cq_resp_desc destroy_resp;
+ int ret;
+
+ memset(&destroy_cmd, 0x0, sizeof(destroy_cmd));
+
+ destroy_cmd.cq_idx = io_cq->idx;
+ destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_CQ;
+
+ ret = ena_com_execute_admin_command(admin_queue,
+ (struct ena_admin_aq_entry *)&destroy_cmd,
+ sizeof(destroy_cmd),
+ (struct ena_admin_acq_entry *)&destroy_resp,
+ sizeof(destroy_resp));
+
+ if (unlikely(ret && (ret != -ENODEV)))
+ netdev_err(ena_dev->net_device,
+ "Failed to destroy IO CQ. error: %d\n", ret);
+
+ return ret;
+}
+
+bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev)
+{
+ return ena_dev->admin_queue.running_state;
+}
+
+void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state)
+{
+ struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+ unsigned long flags = 0;
+
+ spin_lock_irqsave(&admin_queue->q_lock, flags);
+ ena_dev->admin_queue.running_state = state;
+ spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+}
+
+void ena_com_admin_aenq_enable(struct ena_com_dev *ena_dev)
+{
+ u16 depth = ena_dev->aenq.q_depth;
+
+ WARN(ena_dev->aenq.head != depth, "Invalid AENQ state\n");
+
+ /* Init head_db to mark that all entries in the queue
+ * are initially available
+ */
+ writel(depth, ena_dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+}
+
+int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag)
+{
+ struct ena_com_admin_queue *admin_queue;
+ struct ena_admin_set_feat_cmd cmd;
+ struct ena_admin_set_feat_resp resp;
+ struct ena_admin_get_feat_resp get_resp;
+ int ret;
+
+ ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG, 0);
+ if (ret) {
+ dev_info(ena_dev->dmadev, "Can't get aenq configuration\n");
+ return ret;
+ }
+
+ if ((get_resp.u.aenq.supported_groups & groups_flag) != groups_flag) {
+ netdev_warn(ena_dev->net_device,
+ "Trying to set unsupported aenq events. supported flag: 0x%x asked flag: 0x%x\n",
+ get_resp.u.aenq.supported_groups, groups_flag);
+ return -EOPNOTSUPP;
+ }
+
+ memset(&cmd, 0x0, sizeof(cmd));
+ admin_queue = &ena_dev->admin_queue;
+
+ cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+ cmd.aq_common_descriptor.flags = 0;
+ cmd.feat_common.feature_id = ENA_ADMIN_AENQ_CONFIG;
+ cmd.u.aenq.enabled_groups = groups_flag;
+
+ ret = ena_com_execute_admin_command(admin_queue,
+ (struct ena_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct ena_admin_acq_entry *)&resp,
+ sizeof(resp));
+
+ if (unlikely(ret))
+ netdev_err(ena_dev->net_device,
+ "Failed to config AENQ ret: %d\n", ret);
+
+ return ret;
+}
+
+int ena_com_get_dma_width(struct ena_com_dev *ena_dev)
+{
+ u32 caps = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF);
+ u32 width;
+
+ if (unlikely(caps == ENA_MMIO_READ_TIMEOUT)) {
+ netdev_err(ena_dev->net_device, "Reg read timeout occurred\n");
+ return -ETIME;
+ }
+
+ width = (caps & ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >>
+ ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT;
+
+ netdev_dbg(ena_dev->net_device, "ENA dma width: %d\n", width);
+
+ if ((width < 32) || width > ENA_MAX_PHYS_ADDR_SIZE_BITS) {
+ netdev_err(ena_dev->net_device, "DMA width illegal value: %d\n",
+ width);
+ return -EINVAL;
+ }
+
+ ena_dev->dma_addr_bits = width;
+
+ return width;
+}
+
+int ena_com_validate_version(struct ena_com_dev *ena_dev)
+{
+ u32 ver;
+ u32 ctrl_ver;
+ u32 ctrl_ver_masked;
+
+ /* Make sure the ENA version and the controller version are at least
+ * as the driver expects
+ */
+ ver = ena_com_reg_bar_read32(ena_dev, ENA_REGS_VERSION_OFF);
+ ctrl_ver = ena_com_reg_bar_read32(ena_dev,
+ ENA_REGS_CONTROLLER_VERSION_OFF);
+
+ if (unlikely((ver == ENA_MMIO_READ_TIMEOUT) ||
+ (ctrl_ver == ENA_MMIO_READ_TIMEOUT))) {
+ netdev_err(ena_dev->net_device, "Reg read timeout occurred\n");
+ return -ETIME;
+ }
+
+ dev_info(ena_dev->dmadev, "ENA device version: %d.%d\n",
+ (ver & ENA_REGS_VERSION_MAJOR_VERSION_MASK) >>
+ ENA_REGS_VERSION_MAJOR_VERSION_SHIFT,
+ ver & ENA_REGS_VERSION_MINOR_VERSION_MASK);
+
+ dev_info(ena_dev->dmadev,
+ "ENA controller version: %d.%d.%d implementation version %d\n",
+ (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >>
+ ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT,
+ (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >>
+ ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT,
+ (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK),
+ (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >>
+ ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT);
+
+ ctrl_ver_masked =
+ (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) |
+ (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) |
+ (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK);
+
+ /* Validate the ctrl version without the implementation ID */
+ if (ctrl_ver_masked < MIN_ENA_CTRL_VER) {
+ netdev_err(ena_dev->net_device,
+ "ENA ctrl version is lower than the minimal ctrl version the driver supports\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+ena_com_free_ena_admin_queue_comp_ctx(struct ena_com_dev *ena_dev,
+ struct ena_com_admin_queue *admin_queue)
+
+{
+ if (!admin_queue->comp_ctx)
+ return;
+
+ devm_kfree(ena_dev->dmadev, admin_queue->comp_ctx);
+
+ admin_queue->comp_ctx = NULL;
+}
+
+void ena_com_admin_destroy(struct ena_com_dev *ena_dev)
+{
+ struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+ struct ena_com_admin_cq *cq = &admin_queue->cq;
+ struct ena_com_admin_sq *sq = &admin_queue->sq;
+ struct ena_com_aenq *aenq = &ena_dev->aenq;
+ u16 size;
+
+ ena_com_free_ena_admin_queue_comp_ctx(ena_dev, admin_queue);
+
+ size = ADMIN_SQ_SIZE(admin_queue->q_depth);
+ if (sq->entries)
+ dma_free_coherent(ena_dev->dmadev, size, sq->entries,
+ sq->dma_addr);
+ sq->entries = NULL;
+
+ size = ADMIN_CQ_SIZE(admin_queue->q_depth);
+ if (cq->entries)
+ dma_free_coherent(ena_dev->dmadev, size, cq->entries,
+ cq->dma_addr);
+ cq->entries = NULL;
+
+ size = ADMIN_AENQ_SIZE(aenq->q_depth);
+ if (ena_dev->aenq.entries)
+ dma_free_coherent(ena_dev->dmadev, size, aenq->entries,
+ aenq->dma_addr);
+ aenq->entries = NULL;
+}
+
+void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling)
+{
+ u32 mask_value = 0;
+
+ if (polling)
+ mask_value = ENA_REGS_ADMIN_INTR_MASK;
+
+ writel(mask_value, ena_dev->reg_bar + ENA_REGS_INTR_MASK_OFF);
+ ena_dev->admin_queue.polling = polling;
+}
+
+void ena_com_set_admin_auto_polling_mode(struct ena_com_dev *ena_dev,
+ bool polling)
+{
+ ena_dev->admin_queue.auto_polling = polling;
+}
+
+int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev)
+{
+ struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+
+ spin_lock_init(&mmio_read->lock);
+ mmio_read->read_resp =
+ dma_alloc_coherent(ena_dev->dmadev,
+ sizeof(*mmio_read->read_resp),
+ &mmio_read->read_resp_dma_addr, GFP_KERNEL);
+ if (unlikely(!mmio_read->read_resp))
+ goto err;
+
+ ena_com_mmio_reg_read_request_write_dev_addr(ena_dev);
+
+ mmio_read->read_resp->req_id = 0x0;
+ mmio_read->seq_num = 0x0;
+ mmio_read->readless_supported = true;
+
+ return 0;
+
+err:
+
+ return -ENOMEM;
+}
+
+void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev, bool readless_supported)
+{
+ struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+
+ mmio_read->readless_supported = readless_supported;
+}
+
+void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev)
+{
+ struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+
+ writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF);
+ writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF);
+
+ dma_free_coherent(ena_dev->dmadev, sizeof(*mmio_read->read_resp),
+ mmio_read->read_resp, mmio_read->read_resp_dma_addr);
+
+ mmio_read->read_resp = NULL;
+}
+
+void ena_com_mmio_reg_read_request_write_dev_addr(struct ena_com_dev *ena_dev)
+{
+ struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+ u32 addr_low, addr_high;
+
+ addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(mmio_read->read_resp_dma_addr);
+ addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(mmio_read->read_resp_dma_addr);
+
+ writel(addr_low, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF);
+ writel(addr_high, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF);
+}
+
+int ena_com_admin_init(struct ena_com_dev *ena_dev,
+ struct ena_aenq_handlers *aenq_handlers)
+{
+ struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+ u32 aq_caps, acq_caps, dev_sts, addr_low, addr_high;
+ int ret;
+
+ dev_sts = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
+
+ if (unlikely(dev_sts == ENA_MMIO_READ_TIMEOUT)) {
+ netdev_err(ena_dev->net_device, "Reg read timeout occurred\n");
+ return -ETIME;
+ }
+
+ if (!(dev_sts & ENA_REGS_DEV_STS_READY_MASK)) {
+ netdev_err(ena_dev->net_device,
+ "Device isn't ready, abort com init\n");
+ return -ENODEV;
+ }
+
+ admin_queue->q_depth = ENA_ADMIN_QUEUE_DEPTH;
+
+ admin_queue->q_dmadev = ena_dev->dmadev;
+ admin_queue->polling = false;
+ admin_queue->curr_cmd_id = 0;
+
+ atomic_set(&admin_queue->outstanding_cmds, 0);
+
+ spin_lock_init(&admin_queue->q_lock);
+
+ ret = ena_com_init_comp_ctxt(admin_queue);
+ if (ret)
+ goto error;
+
+ ret = ena_com_admin_init_sq(admin_queue);
+ if (ret)
+ goto error;
+
+ ret = ena_com_admin_init_cq(admin_queue);
+ if (ret)
+ goto error;
+
+ admin_queue->sq.db_addr = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+ ENA_REGS_AQ_DB_OFF);
+
+ addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(admin_queue->sq.dma_addr);
+ addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(admin_queue->sq.dma_addr);
+
+ writel(addr_low, ena_dev->reg_bar + ENA_REGS_AQ_BASE_LO_OFF);
+ writel(addr_high, ena_dev->reg_bar + ENA_REGS_AQ_BASE_HI_OFF);
+
+ addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(admin_queue->cq.dma_addr);
+ addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(admin_queue->cq.dma_addr);
+
+ writel(addr_low, ena_dev->reg_bar + ENA_REGS_ACQ_BASE_LO_OFF);
+ writel(addr_high, ena_dev->reg_bar + ENA_REGS_ACQ_BASE_HI_OFF);
+
+ aq_caps = 0;
+ aq_caps |= admin_queue->q_depth & ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK;
+ aq_caps |= (sizeof(struct ena_admin_aq_entry) <<
+ ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) &
+ ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK;
+
+ acq_caps = 0;
+ acq_caps |= admin_queue->q_depth & ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK;
+ acq_caps |= (sizeof(struct ena_admin_acq_entry) <<
+ ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) &
+ ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK;
+
+ writel(aq_caps, ena_dev->reg_bar + ENA_REGS_AQ_CAPS_OFF);
+ writel(acq_caps, ena_dev->reg_bar + ENA_REGS_ACQ_CAPS_OFF);
+ ret = ena_com_admin_init_aenq(ena_dev, aenq_handlers);
+ if (ret)
+ goto error;
+
+ admin_queue->ena_dev = ena_dev;
+ admin_queue->running_state = true;
+
+ return 0;
+error:
+ ena_com_admin_destroy(ena_dev);
+
+ return ret;
+}
+
+int ena_com_create_io_queue(struct ena_com_dev *ena_dev,
+ struct ena_com_create_io_ctx *ctx)
+{
+ struct ena_com_io_sq *io_sq;
+ struct ena_com_io_cq *io_cq;
+ int ret;
+
+ if (ctx->qid >= ENA_TOTAL_NUM_QUEUES) {
+ netdev_err(ena_dev->net_device,
+ "Qid (%d) is bigger than max num of queues (%d)\n",
+ ctx->qid, ENA_TOTAL_NUM_QUEUES);
+ return -EINVAL;
+ }
+
+ io_sq = &ena_dev->io_sq_queues[ctx->qid];
+ io_cq = &ena_dev->io_cq_queues[ctx->qid];
+
+ memset(io_sq, 0x0, sizeof(*io_sq));
+ memset(io_cq, 0x0, sizeof(*io_cq));
+
+ /* Init CQ */
+ io_cq->q_depth = ctx->queue_size;
+ io_cq->direction = ctx->direction;
+ io_cq->qid = ctx->qid;
+
+ io_cq->msix_vector = ctx->msix_vector;
+
+ io_sq->q_depth = ctx->queue_size;
+ io_sq->direction = ctx->direction;
+ io_sq->qid = ctx->qid;
+
+ io_sq->mem_queue_type = ctx->mem_queue_type;
+
+ if (ctx->direction == ENA_COM_IO_QUEUE_DIRECTION_TX)
+ /* header length is limited to 8 bits */
+ io_sq->tx_max_header_size =
+ min_t(u32, ena_dev->tx_max_header_size, SZ_256);
+
+ ret = ena_com_init_io_sq(ena_dev, ctx, io_sq);
+ if (ret)
+ goto error;
+ ret = ena_com_init_io_cq(ena_dev, ctx, io_cq);
+ if (ret)
+ goto error;
+
+ ret = ena_com_create_io_cq(ena_dev, io_cq);
+ if (ret)
+ goto error;
+
+ ret = ena_com_create_io_sq(ena_dev, io_sq, io_cq->idx);
+ if (ret)
+ goto destroy_io_cq;
+
+ return 0;
+
+destroy_io_cq:
+ ena_com_destroy_io_cq(ena_dev, io_cq);
+error:
+ ena_com_io_queue_free(ena_dev, io_sq, io_cq);
+ return ret;
+}
+
+void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid)
+{
+ struct ena_com_io_sq *io_sq;
+ struct ena_com_io_cq *io_cq;
+
+ if (qid >= ENA_TOTAL_NUM_QUEUES) {
+ netdev_err(ena_dev->net_device,
+ "Qid (%d) is bigger than max num of queues (%d)\n",
+ qid, ENA_TOTAL_NUM_QUEUES);
+ return;
+ }
+
+ io_sq = &ena_dev->io_sq_queues[qid];
+ io_cq = &ena_dev->io_cq_queues[qid];
+
+ ena_com_destroy_io_sq(ena_dev, io_sq);
+ ena_com_destroy_io_cq(ena_dev, io_cq);
+
+ ena_com_io_queue_free(ena_dev, io_sq, io_cq);
+}
+
+int ena_com_get_link_params(struct ena_com_dev *ena_dev,
+ struct ena_admin_get_feat_resp *resp)
+{
+ return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG, 0);
+}
+
+int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
+ struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+ struct ena_admin_get_feat_resp get_resp;
+ int rc;
+
+ rc = ena_com_get_feature(ena_dev, &get_resp,
+ ENA_ADMIN_DEVICE_ATTRIBUTES, 0);
+ if (rc)
+ return rc;
+
+ memcpy(&get_feat_ctx->dev_attr, &get_resp.u.dev_attr,
+ sizeof(get_resp.u.dev_attr));
+
+ ena_dev->supported_features = get_resp.u.dev_attr.supported_features;
+ ena_dev->capabilities = get_resp.u.dev_attr.capabilities;
+
+ if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
+ rc = ena_com_get_feature(ena_dev, &get_resp,
+ ENA_ADMIN_MAX_QUEUES_EXT,
+ ENA_FEATURE_MAX_QUEUE_EXT_VER);
+ if (rc)
+ return rc;
+
+ if (get_resp.u.max_queue_ext.version !=
+ ENA_FEATURE_MAX_QUEUE_EXT_VER)
+ return -EINVAL;
+
+ memcpy(&get_feat_ctx->max_queue_ext, &get_resp.u.max_queue_ext,
+ sizeof(get_resp.u.max_queue_ext));
+ ena_dev->tx_max_header_size =
+ get_resp.u.max_queue_ext.max_queue_ext.max_tx_header_size;
+ } else {
+ rc = ena_com_get_feature(ena_dev, &get_resp,
+ ENA_ADMIN_MAX_QUEUES_NUM, 0);
+ memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue,
+ sizeof(get_resp.u.max_queue));
+ ena_dev->tx_max_header_size =
+ get_resp.u.max_queue.max_header_size;
+
+ if (rc)
+ return rc;
+ }
+
+ rc = ena_com_get_feature(ena_dev, &get_resp,
+ ENA_ADMIN_AENQ_CONFIG, 0);
+ if (rc)
+ return rc;
+
+ memcpy(&get_feat_ctx->aenq, &get_resp.u.aenq,
+ sizeof(get_resp.u.aenq));
+
+ rc = ena_com_get_feature(ena_dev, &get_resp,
+ ENA_ADMIN_STATELESS_OFFLOAD_CONFIG, 0);
+ if (rc)
+ return rc;
+
+ memcpy(&get_feat_ctx->offload, &get_resp.u.offload,
+ sizeof(get_resp.u.offload));
+
+ /* Driver hints isn't mandatory admin command. So in case the
+ * command isn't supported set driver hints to 0
+ */
+ rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_HW_HINTS, 0);
+
+ if (!rc)
+ memcpy(&get_feat_ctx->hw_hints, &get_resp.u.hw_hints,
+ sizeof(get_resp.u.hw_hints));
+ else if (rc == -EOPNOTSUPP)
+ memset(&get_feat_ctx->hw_hints, 0x0,
+ sizeof(get_feat_ctx->hw_hints));
+ else
+ return rc;
+
+ rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_LLQ, 0);
+ if (!rc)
+ memcpy(&get_feat_ctx->llq, &get_resp.u.llq,
+ sizeof(get_resp.u.llq));
+ else if (rc == -EOPNOTSUPP)
+ memset(&get_feat_ctx->llq, 0x0, sizeof(get_feat_ctx->llq));
+ else
+ return rc;
+
+ return 0;
+}
+
+void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev)
+{
+ ena_com_handle_admin_completion(&ena_dev->admin_queue);
+}
+
+/* ena_handle_specific_aenq_event:
+ * return the handler that is relevant to the specific event group
+ */
+static ena_aenq_handler ena_com_get_specific_aenq_cb(struct ena_com_dev *ena_dev,
+ u16 group)
+{
+ struct ena_aenq_handlers *aenq_handlers = ena_dev->aenq.aenq_handlers;
+
+ if ((group < ENA_MAX_HANDLERS) && aenq_handlers->handlers[group])
+ return aenq_handlers->handlers[group];
+
+ return aenq_handlers->unimplemented_handler;
+}
+
+/* ena_aenq_intr_handler:
+ * handles the aenq incoming events.
+ * pop events from the queue and apply the specific handler
+ */
+void ena_com_aenq_intr_handler(struct ena_com_dev *ena_dev, void *data)
+{
+ struct ena_admin_aenq_entry *aenq_e;
+ struct ena_admin_aenq_common_desc *aenq_common;
+ struct ena_com_aenq *aenq = &ena_dev->aenq;
+ u64 timestamp;
+ ena_aenq_handler handler_cb;
+ u16 masked_head, processed = 0;
+ u8 phase;
+
+ masked_head = aenq->head & (aenq->q_depth - 1);
+ phase = aenq->phase;
+ aenq_e = &aenq->entries[masked_head]; /* Get first entry */
+ aenq_common = &aenq_e->aenq_common_desc;
+
+ /* Go over all the events */
+ while ((READ_ONCE(aenq_common->flags) &
+ ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) {
+ /* Make sure the phase bit (ownership) is as expected before
+ * reading the rest of the descriptor.
+ */
+ dma_rmb();
+
+ timestamp = (u64)aenq_common->timestamp_low |
+ ((u64)aenq_common->timestamp_high << 32);
+
+ netdev_dbg(ena_dev->net_device,
+ "AENQ! Group[%x] Syndrome[%x] timestamp: [%llus]\n",
+ aenq_common->group, aenq_common->syndrome, timestamp);
+
+ /* Handle specific event*/
+ handler_cb = ena_com_get_specific_aenq_cb(ena_dev,
+ aenq_common->group);
+ handler_cb(data, aenq_e); /* call the actual event handler*/
+
+ /* Get next event entry */
+ masked_head++;
+ processed++;
+
+ if (unlikely(masked_head == aenq->q_depth)) {
+ masked_head = 0;
+ phase = !phase;
+ }
+ aenq_e = &aenq->entries[masked_head];
+ aenq_common = &aenq_e->aenq_common_desc;
+ }
+
+ aenq->head += processed;
+ aenq->phase = phase;
+
+ /* Don't update aenq doorbell if there weren't any processed events */
+ if (!processed)
+ return;
+
+ /* write the aenq doorbell after all AENQ descriptors were read */
+ mb();
+ writel_relaxed((u32)aenq->head,
+ ena_dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+}
+
+int ena_com_dev_reset(struct ena_com_dev *ena_dev,
+ enum ena_regs_reset_reason_types reset_reason)
+{
+ u32 stat, timeout, cap, reset_val;
+ int rc;
+
+ stat = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
+ cap = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF);
+
+ if (unlikely((stat == ENA_MMIO_READ_TIMEOUT) ||
+ (cap == ENA_MMIO_READ_TIMEOUT))) {
+ netdev_err(ena_dev->net_device, "Reg read32 timeout occurred\n");
+ return -ETIME;
+ }
+
+ if ((stat & ENA_REGS_DEV_STS_READY_MASK) == 0) {
+ netdev_err(ena_dev->net_device,
+ "Device isn't ready, can't reset device\n");
+ return -EINVAL;
+ }
+
+ timeout = (cap & ENA_REGS_CAPS_RESET_TIMEOUT_MASK) >>
+ ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT;
+ if (timeout == 0) {
+ netdev_err(ena_dev->net_device, "Invalid timeout value\n");
+ return -EINVAL;
+ }
+
+ /* start reset */
+ reset_val = ENA_REGS_DEV_CTL_DEV_RESET_MASK;
+ reset_val |= (reset_reason << ENA_REGS_DEV_CTL_RESET_REASON_SHIFT) &
+ ENA_REGS_DEV_CTL_RESET_REASON_MASK;
+ writel(reset_val, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF);
+
+ /* Write again the MMIO read request address */
+ ena_com_mmio_reg_read_request_write_dev_addr(ena_dev);
+
+ rc = wait_for_reset_state(ena_dev, timeout,
+ ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK);
+ if (rc != 0) {
+ netdev_err(ena_dev->net_device,
+ "Reset indication didn't turn on\n");
+ return rc;
+ }
+
+ /* reset done */
+ writel(0, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF);
+ rc = wait_for_reset_state(ena_dev, timeout, 0);
+ if (rc != 0) {
+ netdev_err(ena_dev->net_device,
+ "Reset indication didn't turn off\n");
+ return rc;
+ }
+
+ timeout = (cap & ENA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
+ ENA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
+ if (timeout)
+ /* the resolution of timeout reg is 100ms */
+ ena_dev->admin_queue.completion_timeout = timeout * 100000;
+ else
+ ena_dev->admin_queue.completion_timeout = ADMIN_CMD_TIMEOUT_US;
+
+ return 0;
+}
+
+static int ena_get_dev_stats(struct ena_com_dev *ena_dev,
+ struct ena_com_stats_ctx *ctx,
+ enum ena_admin_get_stats_type type)
+{
+ struct ena_admin_aq_get_stats_cmd *get_cmd = &ctx->get_cmd;
+ struct ena_admin_acq_get_stats_resp *get_resp = &ctx->get_resp;
+ struct ena_com_admin_queue *admin_queue;
+ int ret;
+
+ admin_queue = &ena_dev->admin_queue;
+
+ get_cmd->aq_common_descriptor.opcode = ENA_ADMIN_GET_STATS;
+ get_cmd->aq_common_descriptor.flags = 0;
+ get_cmd->type = type;
+
+ ret = ena_com_execute_admin_command(admin_queue,
+ (struct ena_admin_aq_entry *)get_cmd,
+ sizeof(*get_cmd),
+ (struct ena_admin_acq_entry *)get_resp,
+ sizeof(*get_resp));
+
+ if (unlikely(ret))
+ netdev_err(ena_dev->net_device,
+ "Failed to get stats. error: %d\n", ret);
+
+ return ret;
+}
+
+int ena_com_get_eni_stats(struct ena_com_dev *ena_dev,
+ struct ena_admin_eni_stats *stats)
+{
+ struct ena_com_stats_ctx ctx;
+ int ret;
+
+ if (!ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) {
+ netdev_err(ena_dev->net_device,
+ "Capability %d isn't supported\n",
+ ENA_ADMIN_ENI_STATS);
+ return -EOPNOTSUPP;
+ }
+
+ memset(&ctx, 0x0, sizeof(ctx));
+ ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_ENI);
+ if (likely(ret == 0))
+ memcpy(stats, &ctx.get_resp.u.eni_stats,
+ sizeof(ctx.get_resp.u.eni_stats));
+
+ return ret;
+}
+
+int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev,
+ struct ena_admin_basic_stats *stats)
+{
+ struct ena_com_stats_ctx ctx;
+ int ret;
+
+ memset(&ctx, 0x0, sizeof(ctx));
+ ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_BASIC);
+ if (likely(ret == 0))
+ memcpy(stats, &ctx.get_resp.u.basic_stats,
+ sizeof(ctx.get_resp.u.basic_stats));
+
+ return ret;
+}
+
+int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, u32 mtu)
+{
+ struct ena_com_admin_queue *admin_queue;
+ struct ena_admin_set_feat_cmd cmd;
+ struct ena_admin_set_feat_resp resp;
+ int ret;
+
+ if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_MTU)) {
+ netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n",
+ ENA_ADMIN_MTU);
+ return -EOPNOTSUPP;
+ }
+
+ memset(&cmd, 0x0, sizeof(cmd));
+ admin_queue = &ena_dev->admin_queue;
+
+ cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+ cmd.aq_common_descriptor.flags = 0;
+ cmd.feat_common.feature_id = ENA_ADMIN_MTU;
+ cmd.u.mtu.mtu = mtu;
+
+ ret = ena_com_execute_admin_command(admin_queue,
+ (struct ena_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct ena_admin_acq_entry *)&resp,
+ sizeof(resp));
+
+ if (unlikely(ret))
+ netdev_err(ena_dev->net_device,
+ "Failed to set mtu %d. error: %d\n", mtu, ret);
+
+ return ret;
+}
+
+int ena_com_get_offload_settings(struct ena_com_dev *ena_dev,
+ struct ena_admin_feature_offload_desc *offload)
+{
+ int ret;
+ struct ena_admin_get_feat_resp resp;
+
+ ret = ena_com_get_feature(ena_dev, &resp,
+ ENA_ADMIN_STATELESS_OFFLOAD_CONFIG, 0);
+ if (unlikely(ret)) {
+ netdev_err(ena_dev->net_device,
+ "Failed to get offload capabilities %d\n", ret);
+ return ret;
+ }
+
+ memcpy(offload, &resp.u.offload, sizeof(resp.u.offload));
+
+ return 0;
+}
+
+int ena_com_set_hash_function(struct ena_com_dev *ena_dev)
+{
+ struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+ struct ena_rss *rss = &ena_dev->rss;
+ struct ena_admin_set_feat_cmd cmd;
+ struct ena_admin_set_feat_resp resp;
+ struct ena_admin_get_feat_resp get_resp;
+ int ret;
+
+ if (!ena_com_check_supported_feature_id(ena_dev,
+ ENA_ADMIN_RSS_HASH_FUNCTION)) {
+ netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n",
+ ENA_ADMIN_RSS_HASH_FUNCTION);
+ return -EOPNOTSUPP;
+ }
+
+ /* Validate hash function is supported */
+ ret = ena_com_get_feature(ena_dev, &get_resp,
+ ENA_ADMIN_RSS_HASH_FUNCTION, 0);
+ if (unlikely(ret))
+ return ret;
+
+ if (!(get_resp.u.flow_hash_func.supported_func & BIT(rss->hash_func))) {
+ netdev_err(ena_dev->net_device,
+ "Func hash %d isn't supported by device, abort\n",
+ rss->hash_func);
+ return -EOPNOTSUPP;
+ }
+
+ memset(&cmd, 0x0, sizeof(cmd));
+
+ cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+ cmd.aq_common_descriptor.flags =
+ ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+ cmd.feat_common.feature_id = ENA_ADMIN_RSS_HASH_FUNCTION;
+ cmd.u.flow_hash_func.init_val = rss->hash_init_val;
+ cmd.u.flow_hash_func.selected_func = 1 << rss->hash_func;
+
+ ret = ena_com_mem_addr_set(ena_dev,
+ &cmd.control_buffer.address,
+ rss->hash_key_dma_addr);
+ if (unlikely(ret)) {
+ netdev_err(ena_dev->net_device, "Memory address set failed\n");
+ return ret;
+ }
+
+ cmd.control_buffer.length = sizeof(*rss->hash_key);
+
+ ret = ena_com_execute_admin_command(admin_queue,
+ (struct ena_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct ena_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (unlikely(ret)) {
+ netdev_err(ena_dev->net_device,
+ "Failed to set hash function %d. error: %d\n",
+ rss->hash_func, ret);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
+ enum ena_admin_hash_functions func,
+ const u8 *key, u16 key_len, u32 init_val)
+{
+ struct ena_admin_feature_rss_flow_hash_control *hash_key;
+ struct ena_admin_get_feat_resp get_resp;
+ enum ena_admin_hash_functions old_func;
+ struct ena_rss *rss = &ena_dev->rss;
+ int rc;
+
+ hash_key = rss->hash_key;
+
+ /* Make sure size is a mult of DWs */
+ if (unlikely(key_len & 0x3))
+ return -EINVAL;
+
+ rc = ena_com_get_feature_ex(ena_dev, &get_resp,
+ ENA_ADMIN_RSS_HASH_FUNCTION,
+ rss->hash_key_dma_addr,
+ sizeof(*rss->hash_key), 0);
+ if (unlikely(rc))
+ return rc;
+
+ if (!(BIT(func) & get_resp.u.flow_hash_func.supported_func)) {
+ netdev_err(ena_dev->net_device,
+ "Flow hash function %d isn't supported\n", func);
+ return -EOPNOTSUPP;
+ }
+
+ if ((func == ENA_ADMIN_TOEPLITZ) && key) {
+ if (key_len != sizeof(hash_key->key)) {
+ netdev_err(ena_dev->net_device,
+ "key len (%u) doesn't equal the supported size (%zu)\n",
+ key_len, sizeof(hash_key->key));
+ return -EINVAL;
+ }
+ memcpy(hash_key->key, key, key_len);
+ hash_key->key_parts = key_len / sizeof(hash_key->key[0]);
+ }
+
+ rss->hash_init_val = init_val;
+ old_func = rss->hash_func;
+ rss->hash_func = func;
+ rc = ena_com_set_hash_function(ena_dev);
+
+ /* Restore the old function */
+ if (unlikely(rc))
+ rss->hash_func = old_func;
+
+ return rc;
+}
+
+int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
+ enum ena_admin_hash_functions *func)
+{
+ struct ena_rss *rss = &ena_dev->rss;
+ struct ena_admin_get_feat_resp get_resp;
+ int rc;
+
+ if (unlikely(!func))
+ return -EINVAL;
+
+ rc = ena_com_get_feature_ex(ena_dev, &get_resp,
+ ENA_ADMIN_RSS_HASH_FUNCTION,
+ rss->hash_key_dma_addr,
+ sizeof(*rss->hash_key), 0);
+ if (unlikely(rc))
+ return rc;
+
+ /* ffs() returns 1 in case the lsb is set */
+ rss->hash_func = ffs(get_resp.u.flow_hash_func.selected_func);
+ if (rss->hash_func)
+ rss->hash_func--;
+
+ *func = rss->hash_func;
+
+ return 0;
+}
+
+int ena_com_get_hash_key(struct ena_com_dev *ena_dev, u8 *key)
+{
+ struct ena_admin_feature_rss_flow_hash_control *hash_key =
+ ena_dev->rss.hash_key;
+
+ if (key)
+ memcpy(key, hash_key->key,
+ (size_t)(hash_key->key_parts) * sizeof(hash_key->key[0]));
+
+ return 0;
+}
+
+int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev,
+ enum ena_admin_flow_hash_proto proto,
+ u16 *fields)
+{
+ struct ena_rss *rss = &ena_dev->rss;
+ struct ena_admin_get_feat_resp get_resp;
+ int rc;
+
+ rc = ena_com_get_feature_ex(ena_dev, &get_resp,
+ ENA_ADMIN_RSS_HASH_INPUT,
+ rss->hash_ctrl_dma_addr,
+ sizeof(*rss->hash_ctrl), 0);
+ if (unlikely(rc))
+ return rc;
+
+ if (fields)
+ *fields = rss->hash_ctrl->selected_fields[proto].fields;
+
+ return 0;
+}
+
+int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev)
+{
+ struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+ struct ena_rss *rss = &ena_dev->rss;
+ struct ena_admin_feature_rss_hash_control *hash_ctrl = rss->hash_ctrl;
+ struct ena_admin_set_feat_cmd cmd;
+ struct ena_admin_set_feat_resp resp;
+ int ret;
+
+ if (!ena_com_check_supported_feature_id(ena_dev,
+ ENA_ADMIN_RSS_HASH_INPUT)) {
+ netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n",
+ ENA_ADMIN_RSS_HASH_INPUT);
+ return -EOPNOTSUPP;
+ }
+
+ memset(&cmd, 0x0, sizeof(cmd));
+
+ cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+ cmd.aq_common_descriptor.flags =
+ ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+ cmd.feat_common.feature_id = ENA_ADMIN_RSS_HASH_INPUT;
+ cmd.u.flow_hash_input.enabled_input_sort =
+ ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK |
+ ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK;
+
+ ret = ena_com_mem_addr_set(ena_dev,
+ &cmd.control_buffer.address,
+ rss->hash_ctrl_dma_addr);
+ if (unlikely(ret)) {
+ netdev_err(ena_dev->net_device, "Memory address set failed\n");
+ return ret;
+ }
+ cmd.control_buffer.length = sizeof(*hash_ctrl);
+
+ ret = ena_com_execute_admin_command(admin_queue,
+ (struct ena_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct ena_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (unlikely(ret))
+ netdev_err(ena_dev->net_device,
+ "Failed to set hash input. error: %d\n", ret);
+
+ return ret;
+}
+
+int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev)
+{
+ struct ena_rss *rss = &ena_dev->rss;
+ struct ena_admin_feature_rss_hash_control *hash_ctrl =
+ rss->hash_ctrl;
+ u16 available_fields = 0;
+ int rc, i;
+
+ /* Get the supported hash input */
+ rc = ena_com_get_hash_ctrl(ena_dev, 0, NULL);
+ if (unlikely(rc))
+ return rc;
+
+ hash_ctrl->selected_fields[ENA_ADMIN_RSS_TCP4].fields =
+ ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA |
+ ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP;
+
+ hash_ctrl->selected_fields[ENA_ADMIN_RSS_UDP4].fields =
+ ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA |
+ ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP;
+
+ hash_ctrl->selected_fields[ENA_ADMIN_RSS_TCP6].fields =
+ ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA |
+ ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP;
+
+ hash_ctrl->selected_fields[ENA_ADMIN_RSS_UDP6].fields =
+ ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA |
+ ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP;
+
+ hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4].fields =
+ ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA;
+
+ hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP6].fields =
+ ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA;
+
+ hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4_FRAG].fields =
+ ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA;
+
+ hash_ctrl->selected_fields[ENA_ADMIN_RSS_NOT_IP].fields =
+ ENA_ADMIN_RSS_L2_DA | ENA_ADMIN_RSS_L2_SA;
+
+ for (i = 0; i < ENA_ADMIN_RSS_PROTO_NUM; i++) {
+ available_fields = hash_ctrl->selected_fields[i].fields &
+ hash_ctrl->supported_fields[i].fields;
+ if (available_fields != hash_ctrl->selected_fields[i].fields) {
+ netdev_err(ena_dev->net_device,
+ "Hash control doesn't support all the desire configuration. proto %x supported %x selected %x\n",
+ i, hash_ctrl->supported_fields[i].fields,
+ hash_ctrl->selected_fields[i].fields);
+ return -EOPNOTSUPP;
+ }
+ }
+
+ rc = ena_com_set_hash_ctrl(ena_dev);
+
+ /* In case of failure, restore the old hash ctrl */
+ if (unlikely(rc))
+ ena_com_get_hash_ctrl(ena_dev, 0, NULL);
+
+ return rc;
+}
+
+int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev,
+ enum ena_admin_flow_hash_proto proto,
+ u16 hash_fields)
+{
+ struct ena_rss *rss = &ena_dev->rss;
+ struct ena_admin_feature_rss_hash_control *hash_ctrl = rss->hash_ctrl;
+ u16 supported_fields;
+ int rc;
+
+ if (proto >= ENA_ADMIN_RSS_PROTO_NUM) {
+ netdev_err(ena_dev->net_device, "Invalid proto num (%u)\n",
+ proto);
+ return -EINVAL;
+ }
+
+ /* Get the ctrl table */
+ rc = ena_com_get_hash_ctrl(ena_dev, proto, NULL);
+ if (unlikely(rc))
+ return rc;
+
+ /* Make sure all the fields are supported */
+ supported_fields = hash_ctrl->supported_fields[proto].fields;
+ if ((hash_fields & supported_fields) != hash_fields) {
+ netdev_err(ena_dev->net_device,
+ "Proto %d doesn't support the required fields %x. supports only: %x\n",
+ proto, hash_fields, supported_fields);
+ }
+
+ hash_ctrl->selected_fields[proto].fields = hash_fields;
+
+ rc = ena_com_set_hash_ctrl(ena_dev);
+
+ /* In case of failure, restore the old hash ctrl */
+ if (unlikely(rc))
+ ena_com_get_hash_ctrl(ena_dev, 0, NULL);
+
+ return 0;
+}
+
+int ena_com_indirect_table_fill_entry(struct ena_com_dev *ena_dev,
+ u16 entry_idx, u16 entry_value)
+{
+ struct ena_rss *rss = &ena_dev->rss;
+
+ if (unlikely(entry_idx >= (1 << rss->tbl_log_size)))
+ return -EINVAL;
+
+ if (unlikely((entry_value > ENA_TOTAL_NUM_QUEUES)))
+ return -EINVAL;
+
+ rss->host_rss_ind_tbl[entry_idx] = entry_value;
+
+ return 0;
+}
+
+int ena_com_indirect_table_set(struct ena_com_dev *ena_dev)
+{
+ struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+ struct ena_rss *rss = &ena_dev->rss;
+ struct ena_admin_set_feat_cmd cmd;
+ struct ena_admin_set_feat_resp resp;
+ int ret;
+
+ if (!ena_com_check_supported_feature_id(
+ ena_dev, ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG)) {
+ netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n",
+ ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG);
+ return -EOPNOTSUPP;
+ }
+
+ ret = ena_com_ind_tbl_convert_to_device(ena_dev);
+ if (ret) {
+ netdev_err(ena_dev->net_device,
+ "Failed to convert host indirection table to device table\n");
+ return ret;
+ }
+
+ memset(&cmd, 0x0, sizeof(cmd));
+
+ cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+ cmd.aq_common_descriptor.flags =
+ ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+ cmd.feat_common.feature_id = ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG;
+ cmd.u.ind_table.size = rss->tbl_log_size;
+ cmd.u.ind_table.inline_index = 0xFFFFFFFF;
+
+ ret = ena_com_mem_addr_set(ena_dev,
+ &cmd.control_buffer.address,
+ rss->rss_ind_tbl_dma_addr);
+ if (unlikely(ret)) {
+ netdev_err(ena_dev->net_device, "Memory address set failed\n");
+ return ret;
+ }
+
+ cmd.control_buffer.length = (1ULL << rss->tbl_log_size) *
+ sizeof(struct ena_admin_rss_ind_table_entry);
+
+ ret = ena_com_execute_admin_command(admin_queue,
+ (struct ena_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct ena_admin_acq_entry *)&resp,
+ sizeof(resp));
+
+ if (unlikely(ret))
+ netdev_err(ena_dev->net_device,
+ "Failed to set indirect table. error: %d\n", ret);
+
+ return ret;
+}
+
+int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl)
+{
+ struct ena_rss *rss = &ena_dev->rss;
+ struct ena_admin_get_feat_resp get_resp;
+ u32 tbl_size;
+ int i, rc;
+
+ tbl_size = (1ULL << rss->tbl_log_size) *
+ sizeof(struct ena_admin_rss_ind_table_entry);
+
+ rc = ena_com_get_feature_ex(ena_dev, &get_resp,
+ ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG,
+ rss->rss_ind_tbl_dma_addr,
+ tbl_size, 0);
+ if (unlikely(rc))
+ return rc;
+
+ if (!ind_tbl)
+ return 0;
+
+ for (i = 0; i < (1 << rss->tbl_log_size); i++)
+ ind_tbl[i] = rss->host_rss_ind_tbl[i];
+
+ return 0;
+}
+
+int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 indr_tbl_log_size)
+{
+ int rc;
+
+ memset(&ena_dev->rss, 0x0, sizeof(ena_dev->rss));
+
+ rc = ena_com_indirect_table_allocate(ena_dev, indr_tbl_log_size);
+ if (unlikely(rc))
+ goto err_indr_tbl;
+
+ /* The following function might return unsupported in case the
+ * device doesn't support setting the key / hash function. We can safely
+ * ignore this error and have indirection table support only.
+ */
+ rc = ena_com_hash_key_allocate(ena_dev);
+ if (likely(!rc))
+ ena_com_hash_key_fill_default_key(ena_dev);
+ else if (rc != -EOPNOTSUPP)
+ goto err_hash_key;
+
+ rc = ena_com_hash_ctrl_init(ena_dev);
+ if (unlikely(rc))
+ goto err_hash_ctrl;
+
+ return 0;
+
+err_hash_ctrl:
+ ena_com_hash_key_destroy(ena_dev);
+err_hash_key:
+ ena_com_indirect_table_destroy(ena_dev);
+err_indr_tbl:
+
+ return rc;
+}
+
+void ena_com_rss_destroy(struct ena_com_dev *ena_dev)
+{
+ ena_com_indirect_table_destroy(ena_dev);
+ ena_com_hash_key_destroy(ena_dev);
+ ena_com_hash_ctrl_destroy(ena_dev);
+
+ memset(&ena_dev->rss, 0x0, sizeof(ena_dev->rss));
+}
+
+int ena_com_allocate_host_info(struct ena_com_dev *ena_dev)
+{
+ struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+
+ host_attr->host_info =
+ dma_alloc_coherent(ena_dev->dmadev, SZ_4K,
+ &host_attr->host_info_dma_addr, GFP_KERNEL);
+ if (unlikely(!host_attr->host_info))
+ return -ENOMEM;
+
+ host_attr->host_info->ena_spec_version = ((ENA_COMMON_SPEC_VERSION_MAJOR <<
+ ENA_REGS_VERSION_MAJOR_VERSION_SHIFT) |
+ (ENA_COMMON_SPEC_VERSION_MINOR));
+
+ return 0;
+}
+
+int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev,
+ u32 debug_area_size)
+{
+ struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+
+ host_attr->debug_area_virt_addr =
+ dma_alloc_coherent(ena_dev->dmadev, debug_area_size,
+ &host_attr->debug_area_dma_addr, GFP_KERNEL);
+ if (unlikely(!host_attr->debug_area_virt_addr)) {
+ host_attr->debug_area_size = 0;
+ return -ENOMEM;
+ }
+
+ host_attr->debug_area_size = debug_area_size;
+
+ return 0;
+}
+
+void ena_com_delete_host_info(struct ena_com_dev *ena_dev)
+{
+ struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+
+ if (host_attr->host_info) {
+ dma_free_coherent(ena_dev->dmadev, SZ_4K, host_attr->host_info,
+ host_attr->host_info_dma_addr);
+ host_attr->host_info = NULL;
+ }
+}
+
+void ena_com_delete_debug_area(struct ena_com_dev *ena_dev)
+{
+ struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+
+ if (host_attr->debug_area_virt_addr) {
+ dma_free_coherent(ena_dev->dmadev, host_attr->debug_area_size,
+ host_attr->debug_area_virt_addr,
+ host_attr->debug_area_dma_addr);
+ host_attr->debug_area_virt_addr = NULL;
+ }
+}
+
+int ena_com_set_host_attributes(struct ena_com_dev *ena_dev)
+{
+ struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+ struct ena_com_admin_queue *admin_queue;
+ struct ena_admin_set_feat_cmd cmd;
+ struct ena_admin_set_feat_resp resp;
+
+ int ret;
+
+ /* Host attribute config is called before ena_com_get_dev_attr_feat
+ * so ena_com can't check if the feature is supported.
+ */
+
+ memset(&cmd, 0x0, sizeof(cmd));
+ admin_queue = &ena_dev->admin_queue;
+
+ cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+ cmd.feat_common.feature_id = ENA_ADMIN_HOST_ATTR_CONFIG;
+
+ ret = ena_com_mem_addr_set(ena_dev,
+ &cmd.u.host_attr.debug_ba,
+ host_attr->debug_area_dma_addr);
+ if (unlikely(ret)) {
+ netdev_err(ena_dev->net_device, "Memory address set failed\n");
+ return ret;
+ }
+
+ ret = ena_com_mem_addr_set(ena_dev,
+ &cmd.u.host_attr.os_info_ba,
+ host_attr->host_info_dma_addr);
+ if (unlikely(ret)) {
+ netdev_err(ena_dev->net_device, "Memory address set failed\n");
+ return ret;
+ }
+
+ cmd.u.host_attr.debug_area_size = host_attr->debug_area_size;
+
+ ret = ena_com_execute_admin_command(admin_queue,
+ (struct ena_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct ena_admin_acq_entry *)&resp,
+ sizeof(resp));
+
+ if (unlikely(ret))
+ netdev_err(ena_dev->net_device,
+ "Failed to set host attributes: %d\n", ret);
+
+ return ret;
+}
+
+/* Interrupt moderation */
+bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev)
+{
+ return ena_com_check_supported_feature_id(ena_dev,
+ ENA_ADMIN_INTERRUPT_MODERATION);
+}
+
+static int ena_com_update_nonadaptive_moderation_interval(struct ena_com_dev *ena_dev,
+ u32 coalesce_usecs,
+ u32 intr_delay_resolution,
+ u32 *intr_moder_interval)
+{
+ if (!intr_delay_resolution) {
+ netdev_err(ena_dev->net_device,
+ "Illegal interrupt delay granularity value\n");
+ return -EFAULT;
+ }
+
+ *intr_moder_interval = coalesce_usecs / intr_delay_resolution;
+
+ return 0;
+}
+
+int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev,
+ u32 tx_coalesce_usecs)
+{
+ return ena_com_update_nonadaptive_moderation_interval(ena_dev,
+ tx_coalesce_usecs,
+ ena_dev->intr_delay_resolution,
+ &ena_dev->intr_moder_tx_interval);
+}
+
+int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev,
+ u32 rx_coalesce_usecs)
+{
+ return ena_com_update_nonadaptive_moderation_interval(ena_dev,
+ rx_coalesce_usecs,
+ ena_dev->intr_delay_resolution,
+ &ena_dev->intr_moder_rx_interval);
+}
+
+int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev)
+{
+ struct ena_admin_get_feat_resp get_resp;
+ u16 delay_resolution;
+ int rc;
+
+ rc = ena_com_get_feature(ena_dev, &get_resp,
+ ENA_ADMIN_INTERRUPT_MODERATION, 0);
+
+ if (rc) {
+ if (rc == -EOPNOTSUPP) {
+ netdev_dbg(ena_dev->net_device,
+ "Feature %d isn't supported\n",
+ ENA_ADMIN_INTERRUPT_MODERATION);
+ rc = 0;
+ } else {
+ netdev_err(ena_dev->net_device,
+ "Failed to get interrupt moderation admin cmd. rc: %d\n",
+ rc);
+ }
+
+ /* no moderation supported, disable adaptive support */
+ ena_com_disable_adaptive_moderation(ena_dev);
+ return rc;
+ }
+
+ /* if moderation is supported by device we set adaptive moderation */
+ delay_resolution = get_resp.u.intr_moderation.intr_delay_resolution;
+ ena_com_update_intr_delay_resolution(ena_dev, delay_resolution);
+
+ /* Disable adaptive moderation by default - can be enabled later */
+ ena_com_disable_adaptive_moderation(ena_dev);
+
+ return 0;
+}
+
+unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev)
+{
+ return ena_dev->intr_moder_tx_interval;
+}
+
+unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev)
+{
+ return ena_dev->intr_moder_rx_interval;
+}
+
+int ena_com_config_dev_mode(struct ena_com_dev *ena_dev,
+ struct ena_admin_feature_llq_desc *llq_features,
+ struct ena_llq_configurations *llq_default_cfg)
+{
+ struct ena_com_llq_info *llq_info = &ena_dev->llq_info;
+ int rc;
+
+ if (!llq_features->max_llq_num) {
+ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+ return 0;
+ }
+
+ rc = ena_com_config_llq_info(ena_dev, llq_features, llq_default_cfg);
+ if (rc)
+ return rc;
+
+ ena_dev->tx_max_header_size = llq_info->desc_list_entry_size -
+ (llq_info->descs_num_before_header * sizeof(struct ena_eth_io_tx_desc));
+
+ if (unlikely(ena_dev->tx_max_header_size == 0)) {
+ netdev_err(ena_dev->net_device,
+ "The size of the LLQ entry is smaller than needed\n");
+ return -EINVAL;
+ }
+
+ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
new file mode 100644
index 0000000000..3c5081d9d2
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -0,0 +1,1025 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef ENA_COM
+#define ENA_COM
+
+#include <linux/compiler.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/gfp.h>
+#include <linux/io.h>
+#include <linux/prefetch.h>
+#include <linux/sched.h>
+#include <linux/sizes.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <linux/netdevice.h>
+
+#include "ena_common_defs.h"
+#include "ena_admin_defs.h"
+#include "ena_eth_io_defs.h"
+#include "ena_regs_defs.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define ENA_MAX_NUM_IO_QUEUES 128U
+/* We need to queues for each IO (on for Tx and one for Rx) */
+#define ENA_TOTAL_NUM_QUEUES (2 * (ENA_MAX_NUM_IO_QUEUES))
+
+#define ENA_MAX_HANDLERS 256
+
+#define ENA_MAX_PHYS_ADDR_SIZE_BITS 48
+
+/* Unit in usec */
+#define ENA_REG_READ_TIMEOUT 200000
+
+#define ADMIN_SQ_SIZE(depth) ((depth) * sizeof(struct ena_admin_aq_entry))
+#define ADMIN_CQ_SIZE(depth) ((depth) * sizeof(struct ena_admin_acq_entry))
+#define ADMIN_AENQ_SIZE(depth) ((depth) * sizeof(struct ena_admin_aenq_entry))
+
+/*****************************************************************************/
+/*****************************************************************************/
+/* ENA adaptive interrupt moderation settings */
+
+#define ENA_INTR_INITIAL_TX_INTERVAL_USECS 64
+#define ENA_INTR_INITIAL_RX_INTERVAL_USECS 0
+#define ENA_DEFAULT_INTR_DELAY_RESOLUTION 1
+
+#define ENA_HASH_KEY_SIZE 40
+
+#define ENA_HW_HINTS_NO_TIMEOUT 0xFFFF
+
+#define ENA_FEATURE_MAX_QUEUE_EXT_VER 1
+
+struct ena_llq_configurations {
+ enum ena_admin_llq_header_location llq_header_location;
+ enum ena_admin_llq_ring_entry_size llq_ring_entry_size;
+ enum ena_admin_llq_stride_ctrl llq_stride_ctrl;
+ enum ena_admin_llq_num_descs_before_header llq_num_decs_before_header;
+ u16 llq_ring_entry_size_value;
+};
+
+enum queue_direction {
+ ENA_COM_IO_QUEUE_DIRECTION_TX,
+ ENA_COM_IO_QUEUE_DIRECTION_RX
+};
+
+struct ena_com_buf {
+ dma_addr_t paddr; /**< Buffer physical address */
+ u16 len; /**< Buffer length in bytes */
+};
+
+struct ena_com_rx_buf_info {
+ u16 len;
+ u16 req_id;
+};
+
+struct ena_com_io_desc_addr {
+ u8 __iomem *pbuf_dev_addr; /* LLQ address */
+ u8 *virt_addr;
+ dma_addr_t phys_addr;
+};
+
+struct ena_com_tx_meta {
+ u16 mss;
+ u16 l3_hdr_len;
+ u16 l3_hdr_offset;
+ u16 l4_hdr_len; /* In words */
+};
+
+struct ena_com_llq_info {
+ u16 header_location_ctrl;
+ u16 desc_stride_ctrl;
+ u16 desc_list_entry_size_ctrl;
+ u16 desc_list_entry_size;
+ u16 descs_num_before_header;
+ u16 descs_per_entry;
+ u16 max_entries_in_tx_burst;
+ bool disable_meta_caching;
+};
+
+struct ena_com_io_cq {
+ struct ena_com_io_desc_addr cdesc_addr;
+
+ /* Interrupt unmask register */
+ u32 __iomem *unmask_reg;
+
+ /* The completion queue head doorbell register */
+ u32 __iomem *cq_head_db_reg;
+
+ /* numa configuration register (for TPH) */
+ u32 __iomem *numa_node_cfg_reg;
+
+ /* The value to write to the above register to unmask
+ * the interrupt of this queue
+ */
+ u32 msix_vector;
+
+ enum queue_direction direction;
+
+ /* holds the number of cdesc of the current packet */
+ u16 cur_rx_pkt_cdesc_count;
+ /* save the first cdesc idx of the current packet */
+ u16 cur_rx_pkt_cdesc_start_idx;
+
+ u16 q_depth;
+ /* Caller qid */
+ u16 qid;
+
+ /* Device queue index */
+ u16 idx;
+ u16 head;
+ u16 last_head_update;
+ u8 phase;
+ u8 cdesc_entry_size_in_bytes;
+
+} ____cacheline_aligned;
+
+struct ena_com_io_bounce_buffer_control {
+ u8 *base_buffer;
+ u16 next_to_use;
+ u16 buffer_size;
+ u16 buffers_num; /* Must be a power of 2 */
+};
+
+/* This struct is to keep tracking the current location of the next llq entry */
+struct ena_com_llq_pkt_ctrl {
+ u8 *curr_bounce_buf;
+ u16 idx;
+ u16 descs_left_in_line;
+};
+
+struct ena_com_io_sq {
+ struct ena_com_io_desc_addr desc_addr;
+
+ u32 __iomem *db_addr;
+ u8 __iomem *header_addr;
+
+ enum queue_direction direction;
+ enum ena_admin_placement_policy_type mem_queue_type;
+
+ bool disable_meta_caching;
+
+ u32 msix_vector;
+ struct ena_com_tx_meta cached_tx_meta;
+ struct ena_com_llq_info llq_info;
+ struct ena_com_llq_pkt_ctrl llq_buf_ctrl;
+ struct ena_com_io_bounce_buffer_control bounce_buf_ctrl;
+
+ u16 q_depth;
+ u16 qid;
+
+ u16 idx;
+ u16 tail;
+ u16 next_to_comp;
+ u16 llq_last_copy_tail;
+ u32 tx_max_header_size;
+ u8 phase;
+ u8 desc_entry_size;
+ u8 dma_addr_bits;
+ u16 entries_in_tx_burst_left;
+} ____cacheline_aligned;
+
+struct ena_com_admin_cq {
+ struct ena_admin_acq_entry *entries;
+ dma_addr_t dma_addr;
+
+ u16 head;
+ u8 phase;
+};
+
+struct ena_com_admin_sq {
+ struct ena_admin_aq_entry *entries;
+ dma_addr_t dma_addr;
+
+ u32 __iomem *db_addr;
+
+ u16 head;
+ u16 tail;
+ u8 phase;
+
+};
+
+struct ena_com_stats_admin {
+ u64 aborted_cmd;
+ u64 submitted_cmd;
+ u64 completed_cmd;
+ u64 out_of_space;
+ u64 no_completion;
+};
+
+struct ena_com_admin_queue {
+ void *q_dmadev;
+ struct ena_com_dev *ena_dev;
+ spinlock_t q_lock; /* spinlock for the admin queue */
+
+ struct ena_comp_ctx *comp_ctx;
+ u32 completion_timeout;
+ u16 q_depth;
+ struct ena_com_admin_cq cq;
+ struct ena_com_admin_sq sq;
+
+ /* Indicate if the admin queue should poll for completion */
+ bool polling;
+
+ /* Define if fallback to polling mode should occur */
+ bool auto_polling;
+
+ u16 curr_cmd_id;
+
+ /* Indicate that the ena was initialized and can
+ * process new admin commands
+ */
+ bool running_state;
+
+ /* Count the number of outstanding admin commands */
+ atomic_t outstanding_cmds;
+
+ struct ena_com_stats_admin stats;
+};
+
+struct ena_aenq_handlers;
+
+struct ena_com_aenq {
+ u16 head;
+ u8 phase;
+ struct ena_admin_aenq_entry *entries;
+ dma_addr_t dma_addr;
+ u16 q_depth;
+ struct ena_aenq_handlers *aenq_handlers;
+};
+
+struct ena_com_mmio_read {
+ struct ena_admin_ena_mmio_req_read_less_resp *read_resp;
+ dma_addr_t read_resp_dma_addr;
+ u32 reg_read_to; /* in us */
+ u16 seq_num;
+ bool readless_supported;
+ /* spin lock to ensure a single outstanding read */
+ spinlock_t lock;
+};
+
+struct ena_rss {
+ /* Indirect table */
+ u16 *host_rss_ind_tbl;
+ struct ena_admin_rss_ind_table_entry *rss_ind_tbl;
+ dma_addr_t rss_ind_tbl_dma_addr;
+ u16 tbl_log_size;
+
+ /* Hash key */
+ enum ena_admin_hash_functions hash_func;
+ struct ena_admin_feature_rss_flow_hash_control *hash_key;
+ dma_addr_t hash_key_dma_addr;
+ u32 hash_init_val;
+
+ /* Flow Control */
+ struct ena_admin_feature_rss_hash_control *hash_ctrl;
+ dma_addr_t hash_ctrl_dma_addr;
+
+};
+
+struct ena_host_attribute {
+ /* Debug area */
+ u8 *debug_area_virt_addr;
+ dma_addr_t debug_area_dma_addr;
+ u32 debug_area_size;
+
+ /* Host information */
+ struct ena_admin_host_info *host_info;
+ dma_addr_t host_info_dma_addr;
+};
+
+/* Each ena_dev is a PCI function. */
+struct ena_com_dev {
+ struct ena_com_admin_queue admin_queue;
+ struct ena_com_aenq aenq;
+ struct ena_com_io_cq io_cq_queues[ENA_TOTAL_NUM_QUEUES];
+ struct ena_com_io_sq io_sq_queues[ENA_TOTAL_NUM_QUEUES];
+ u8 __iomem *reg_bar;
+ void __iomem *mem_bar;
+ void *dmadev;
+ struct net_device *net_device;
+
+ enum ena_admin_placement_policy_type tx_mem_queue_type;
+ u32 tx_max_header_size;
+ u16 stats_func; /* Selected function for extended statistic dump */
+ u16 stats_queue; /* Selected queue for extended statistic dump */
+
+ struct ena_com_mmio_read mmio_read;
+
+ struct ena_rss rss;
+ u32 supported_features;
+ u32 capabilities;
+ u32 dma_addr_bits;
+
+ struct ena_host_attribute host_attr;
+ bool adaptive_coalescing;
+ u16 intr_delay_resolution;
+
+ /* interrupt moderation intervals are in usec divided by
+ * intr_delay_resolution, which is supplied by the device.
+ */
+ u32 intr_moder_tx_interval;
+ u32 intr_moder_rx_interval;
+
+ struct ena_intr_moder_entry *intr_moder_tbl;
+
+ struct ena_com_llq_info llq_info;
+
+ u32 ena_min_poll_delay_us;
+};
+
+struct ena_com_dev_get_features_ctx {
+ struct ena_admin_queue_feature_desc max_queues;
+ struct ena_admin_queue_ext_feature_desc max_queue_ext;
+ struct ena_admin_device_attr_feature_desc dev_attr;
+ struct ena_admin_feature_aenq_desc aenq;
+ struct ena_admin_feature_offload_desc offload;
+ struct ena_admin_ena_hw_hints hw_hints;
+ struct ena_admin_feature_llq_desc llq;
+};
+
+struct ena_com_create_io_ctx {
+ enum ena_admin_placement_policy_type mem_queue_type;
+ enum queue_direction direction;
+ int numa_node;
+ u32 msix_vector;
+ u16 queue_size;
+ u16 qid;
+};
+
+typedef void (*ena_aenq_handler)(void *data,
+ struct ena_admin_aenq_entry *aenq_e);
+
+/* Holds aenq handlers. Indexed by AENQ event group */
+struct ena_aenq_handlers {
+ ena_aenq_handler handlers[ENA_MAX_HANDLERS];
+ ena_aenq_handler unimplemented_handler;
+};
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/* ena_com_mmio_reg_read_request_init - Init the mmio reg read mechanism
+ * @ena_dev: ENA communication layer struct
+ *
+ * Initialize the register read mechanism.
+ *
+ * @note: This method must be the first stage in the initialization sequence.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_mmio_read_mode - Enable/disable the indirect mmio reg read mechanism
+ * @ena_dev: ENA communication layer struct
+ * @readless_supported: readless mode (enable/disable)
+ */
+void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev,
+ bool readless_supported);
+
+/* ena_com_mmio_reg_read_request_write_dev_addr - Write the mmio reg read return
+ * value physical address.
+ * @ena_dev: ENA communication layer struct
+ */
+void ena_com_mmio_reg_read_request_write_dev_addr(struct ena_com_dev *ena_dev);
+
+/* ena_com_mmio_reg_read_request_destroy - Destroy the mmio reg read mechanism
+ * @ena_dev: ENA communication layer struct
+ */
+void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev);
+
+/* ena_com_admin_init - Init the admin and the async queues
+ * @ena_dev: ENA communication layer struct
+ * @aenq_handlers: Those handlers to be called upon event.
+ *
+ * Initialize the admin submission and completion queues.
+ * Initialize the asynchronous events notification queues.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_admin_init(struct ena_com_dev *ena_dev,
+ struct ena_aenq_handlers *aenq_handlers);
+
+/* ena_com_admin_destroy - Destroy the admin and the async events queues.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @note: Before calling this method, the caller must validate that the device
+ * won't send any additional admin completions/aenq.
+ * To achieve that, a FLR is recommended.
+ */
+void ena_com_admin_destroy(struct ena_com_dev *ena_dev);
+
+/* ena_com_dev_reset - Perform device FLR to the device.
+ * @ena_dev: ENA communication layer struct
+ * @reset_reason: Specify what is the trigger for the reset in case of an error.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_dev_reset(struct ena_com_dev *ena_dev,
+ enum ena_regs_reset_reason_types reset_reason);
+
+/* ena_com_create_io_queue - Create io queue.
+ * @ena_dev: ENA communication layer struct
+ * @ctx - create context structure
+ *
+ * Create the submission and the completion queues.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_create_io_queue(struct ena_com_dev *ena_dev,
+ struct ena_com_create_io_ctx *ctx);
+
+/* ena_com_destroy_io_queue - Destroy IO queue with the queue id - qid.
+ * @ena_dev: ENA communication layer struct
+ * @qid - the caller virtual queue id.
+ */
+void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid);
+
+/* ena_com_get_io_handlers - Return the io queue handlers
+ * @ena_dev: ENA communication layer struct
+ * @qid - the caller virtual queue id.
+ * @io_sq - IO submission queue handler
+ * @io_cq - IO completion queue handler.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid,
+ struct ena_com_io_sq **io_sq,
+ struct ena_com_io_cq **io_cq);
+
+/* ena_com_admin_aenq_enable - ENAble asynchronous event notifications
+ * @ena_dev: ENA communication layer struct
+ *
+ * After this method, aenq event can be received via AENQ.
+ */
+void ena_com_admin_aenq_enable(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_admin_running_state - Set the state of the admin queue
+ * @ena_dev: ENA communication layer struct
+ *
+ * Change the state of the admin queue (enable/disable)
+ */
+void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state);
+
+/* ena_com_get_admin_running_state - Get the admin queue state
+ * @ena_dev: ENA communication layer struct
+ *
+ * Retrieve the state of the admin queue (enable/disable)
+ *
+ * @return - current polling mode (enable/disable)
+ */
+bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_admin_polling_mode - Set the admin completion queue polling mode
+ * @ena_dev: ENA communication layer struct
+ * @polling: ENAble/Disable polling mode
+ *
+ * Set the admin completion mode.
+ */
+void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling);
+
+/* ena_com_set_admin_auto_polling_mode - Enable autoswitch to polling mode
+ * @ena_dev: ENA communication layer struct
+ * @polling: Enable/Disable polling mode
+ *
+ * Set the autopolling mode.
+ * If autopolling is on:
+ * In case of missing interrupt when data is available switch to polling.
+ */
+void ena_com_set_admin_auto_polling_mode(struct ena_com_dev *ena_dev,
+ bool polling);
+
+/* ena_com_admin_q_comp_intr_handler - admin queue interrupt handler
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method goes over the admin completion queue and wakes up all the pending
+ * threads that wait on the commands wait event.
+ *
+ * @note: Should be called after MSI-X interrupt.
+ */
+void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev);
+
+/* ena_com_aenq_intr_handler - AENQ interrupt handler
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method goes over the async event notification queue and calls the proper
+ * aenq handler.
+ */
+void ena_com_aenq_intr_handler(struct ena_com_dev *ena_dev, void *data);
+
+/* ena_com_abort_admin_commands - Abort all the outstanding admin commands.
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method aborts all the outstanding admin commands.
+ * The caller should then call ena_com_wait_for_abort_completion to make sure
+ * all the commands were completed.
+ */
+void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev);
+
+/* ena_com_wait_for_abort_completion - Wait for admin commands abort.
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method waits until all the outstanding admin commands are completed.
+ */
+void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev);
+
+/* ena_com_validate_version - Validate the device parameters
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method verifies the device parameters are the same as the saved
+ * parameters in ena_dev.
+ * This method is useful after device reset, to validate the device mac address
+ * and the device offloads are the same as before the reset.
+ *
+ * @return - 0 on success negative value otherwise.
+ */
+int ena_com_validate_version(struct ena_com_dev *ena_dev);
+
+/* ena_com_get_link_params - Retrieve physical link parameters.
+ * @ena_dev: ENA communication layer struct
+ * @resp: Link parameters
+ *
+ * Retrieve the physical link parameters,
+ * like speed, auto-negotiation and full duplex support.
+ *
+ * @return - 0 on Success negative value otherwise.
+ */
+int ena_com_get_link_params(struct ena_com_dev *ena_dev,
+ struct ena_admin_get_feat_resp *resp);
+
+/* ena_com_get_dma_width - Retrieve physical dma address width the device
+ * supports.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Retrieve the maximum physical address bits the device can handle.
+ *
+ * @return: > 0 on Success and negative value otherwise.
+ */
+int ena_com_get_dma_width(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_aenq_config - Set aenq groups configurations
+ * @ena_dev: ENA communication layer struct
+ * @groups flag: bit fields flags of enum ena_admin_aenq_group.
+ *
+ * Configure which aenq event group the driver would like to receive.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag);
+
+/* ena_com_get_dev_attr_feat - Get device features
+ * @ena_dev: ENA communication layer struct
+ * @get_feat_ctx: returned context that contain the get features.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
+ struct ena_com_dev_get_features_ctx *get_feat_ctx);
+
+/* ena_com_get_dev_basic_stats - Get device basic statistics
+ * @ena_dev: ENA communication layer struct
+ * @stats: stats return value
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev,
+ struct ena_admin_basic_stats *stats);
+
+/* ena_com_get_eni_stats - Get extended network interface statistics
+ * @ena_dev: ENA communication layer struct
+ * @stats: stats return value
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_eni_stats(struct ena_com_dev *ena_dev,
+ struct ena_admin_eni_stats *stats);
+
+/* ena_com_set_dev_mtu - Configure the device mtu.
+ * @ena_dev: ENA communication layer struct
+ * @mtu: mtu value
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, u32 mtu);
+
+/* ena_com_get_offload_settings - Retrieve the device offloads capabilities
+ * @ena_dev: ENA communication layer struct
+ * @offlad: offload return value
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_offload_settings(struct ena_com_dev *ena_dev,
+ struct ena_admin_feature_offload_desc *offload);
+
+/* ena_com_rss_init - Init RSS
+ * @ena_dev: ENA communication layer struct
+ * @log_size: indirection log size
+ *
+ * Allocate RSS/RFS resources.
+ * The caller then can configure rss using ena_com_set_hash_function,
+ * ena_com_set_hash_ctrl and ena_com_indirect_table_set.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 log_size);
+
+/* ena_com_rss_destroy - Destroy rss
+ * @ena_dev: ENA communication layer struct
+ *
+ * Free all the RSS/RFS resources.
+ */
+void ena_com_rss_destroy(struct ena_com_dev *ena_dev);
+
+/* ena_com_get_current_hash_function - Get RSS hash function
+ * @ena_dev: ENA communication layer struct
+ *
+ * Return the current hash function.
+ * @return: 0 or one of the ena_admin_hash_functions values.
+ */
+int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev);
+
+/* ena_com_fill_hash_function - Fill RSS hash function
+ * @ena_dev: ENA communication layer struct
+ * @func: The hash function (Toeplitz or crc)
+ * @key: Hash key (for toeplitz hash)
+ * @key_len: key length (max length 10 DW)
+ * @init_val: initial value for the hash function
+ *
+ * Fill the ena_dev resources with the desire hash function, hash key, key_len
+ * and key initial value (if needed by the hash function).
+ * To flush the key into the device the caller should call
+ * ena_com_set_hash_function.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
+ enum ena_admin_hash_functions func,
+ const u8 *key, u16 key_len, u32 init_val);
+
+/* ena_com_set_hash_function - Flush the hash function and it dependencies to
+ * the device.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Flush the hash function and it dependencies (key, key length and
+ * initial value) if needed.
+ *
+ * @note: Prior to this method the caller should call ena_com_fill_hash_function
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_hash_function(struct ena_com_dev *ena_dev);
+
+/* ena_com_get_hash_function - Retrieve the hash function from the device.
+ * @ena_dev: ENA communication layer struct
+ * @func: hash function
+ *
+ * Retrieve the hash function from the device.
+ *
+ * @note: If the caller called ena_com_fill_hash_function but didn't flush
+ * it to the device, the new configuration will be lost.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
+ enum ena_admin_hash_functions *func);
+
+/* ena_com_get_hash_key - Retrieve the hash key
+ * @ena_dev: ENA communication layer struct
+ * @key: hash key
+ *
+ * Retrieve the hash key.
+ *
+ * @note: If the caller called ena_com_fill_hash_key but didn't flush
+ * it to the device, the new configuration will be lost.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_hash_key(struct ena_com_dev *ena_dev, u8 *key);
+/* ena_com_fill_hash_ctrl - Fill RSS hash control
+ * @ena_dev: ENA communication layer struct.
+ * @proto: The protocol to configure.
+ * @hash_fields: bit mask of ena_admin_flow_hash_fields
+ *
+ * Fill the ena_dev resources with the desire hash control (the ethernet
+ * fields that take part of the hash) for a specific protocol.
+ * To flush the hash control to the device, the caller should call
+ * ena_com_set_hash_ctrl.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev,
+ enum ena_admin_flow_hash_proto proto,
+ u16 hash_fields);
+
+/* ena_com_set_hash_ctrl - Flush the hash control resources to the device.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Flush the hash control (the ethernet fields that take part of the hash)
+ *
+ * @note: Prior to this method the caller should call ena_com_fill_hash_ctrl.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev);
+
+/* ena_com_get_hash_ctrl - Retrieve the hash control from the device.
+ * @ena_dev: ENA communication layer struct
+ * @proto: The protocol to retrieve.
+ * @fields: bit mask of ena_admin_flow_hash_fields.
+ *
+ * Retrieve the hash control from the device.
+ *
+ * @note: If the caller called ena_com_fill_hash_ctrl but didn't flush
+ * it to the device, the new configuration will be lost.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev,
+ enum ena_admin_flow_hash_proto proto,
+ u16 *fields);
+
+/* ena_com_set_default_hash_ctrl - Set the hash control to a default
+ * configuration.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Fill the ena_dev resources with the default hash control configuration.
+ * To flush the hash control to the device, the caller should call
+ * ena_com_set_hash_ctrl.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev);
+
+/* ena_com_indirect_table_fill_entry - Fill a single entry in the RSS
+ * indirection table
+ * @ena_dev: ENA communication layer struct.
+ * @entry_idx - indirection table entry.
+ * @entry_value - redirection value
+ *
+ * Fill a single entry of the RSS indirection table in the ena_dev resources.
+ * To flush the indirection table to the device, the called should call
+ * ena_com_indirect_table_set.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_indirect_table_fill_entry(struct ena_com_dev *ena_dev,
+ u16 entry_idx, u16 entry_value);
+
+/* ena_com_indirect_table_set - Flush the indirection table to the device.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Flush the indirection hash control to the device.
+ * Prior to this method the caller should call ena_com_indirect_table_fill_entry
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_indirect_table_set(struct ena_com_dev *ena_dev);
+
+/* ena_com_indirect_table_get - Retrieve the indirection table from the device.
+ * @ena_dev: ENA communication layer struct
+ * @ind_tbl: indirection table
+ *
+ * Retrieve the RSS indirection table from the device.
+ *
+ * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flush
+ * it to the device, the new configuration will be lost.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl);
+
+/* ena_com_allocate_host_info - Allocate host info resources.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_allocate_host_info(struct ena_com_dev *ena_dev);
+
+/* ena_com_allocate_debug_area - Allocate debug area.
+ * @ena_dev: ENA communication layer struct
+ * @debug_area_size - debug area size.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev,
+ u32 debug_area_size);
+
+/* ena_com_delete_debug_area - Free the debug area resources.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Free the allocated debug area.
+ */
+void ena_com_delete_debug_area(struct ena_com_dev *ena_dev);
+
+/* ena_com_delete_host_info - Free the host info resources.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Free the allocated host info.
+ */
+void ena_com_delete_host_info(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_host_attributes - Update the device with the host
+ * attributes (debug area and host info) base address.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_host_attributes(struct ena_com_dev *ena_dev);
+
+/* ena_com_create_io_cq - Create io completion queue.
+ * @ena_dev: ENA communication layer struct
+ * @io_cq - io completion queue handler
+
+ * Create IO completion queue.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_create_io_cq(struct ena_com_dev *ena_dev,
+ struct ena_com_io_cq *io_cq);
+
+/* ena_com_destroy_io_cq - Destroy io completion queue.
+ * @ena_dev: ENA communication layer struct
+ * @io_cq - io completion queue handler
+
+ * Destroy IO completion queue.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev,
+ struct ena_com_io_cq *io_cq);
+
+/* ena_com_execute_admin_command - Execute admin command
+ * @admin_queue: admin queue.
+ * @cmd: the admin command to execute.
+ * @cmd_size: the command size.
+ * @cmd_completion: command completion return value.
+ * @cmd_comp_size: command completion size.
+
+ * Submit an admin command and then wait until the device returns a
+ * completion.
+ * The completion will be copied into cmd_comp.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue,
+ struct ena_admin_aq_entry *cmd,
+ size_t cmd_size,
+ struct ena_admin_acq_entry *cmd_comp,
+ size_t cmd_comp_size);
+
+/* ena_com_init_interrupt_moderation - Init interrupt moderation
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev);
+
+/* ena_com_interrupt_moderation_supported - Return if interrupt moderation
+ * capability is supported by the device.
+ *
+ * @return - supported or not.
+ */
+bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev);
+
+/* ena_com_update_nonadaptive_moderation_interval_tx - Update the
+ * non-adaptive interval in Tx direction.
+ * @ena_dev: ENA communication layer struct
+ * @tx_coalesce_usecs: Interval in usec.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev,
+ u32 tx_coalesce_usecs);
+
+/* ena_com_update_nonadaptive_moderation_interval_rx - Update the
+ * non-adaptive interval in Rx direction.
+ * @ena_dev: ENA communication layer struct
+ * @rx_coalesce_usecs: Interval in usec.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev,
+ u32 rx_coalesce_usecs);
+
+/* ena_com_get_nonadaptive_moderation_interval_tx - Retrieve the
+ * non-adaptive interval in Tx direction.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return - interval in usec
+ */
+unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev);
+
+/* ena_com_get_nonadaptive_moderation_interval_rx - Retrieve the
+ * non-adaptive interval in Rx direction.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return - interval in usec
+ */
+unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev);
+
+/* ena_com_config_dev_mode - Configure the placement policy of the device.
+ * @ena_dev: ENA communication layer struct
+ * @llq_features: LLQ feature descriptor, retrieve via
+ * ena_com_get_dev_attr_feat.
+ * @ena_llq_config: The default driver LLQ parameters configurations
+ */
+int ena_com_config_dev_mode(struct ena_com_dev *ena_dev,
+ struct ena_admin_feature_llq_desc *llq_features,
+ struct ena_llq_configurations *llq_default_config);
+
+/* ena_com_io_sq_to_ena_dev - Extract ena_com_dev using contained field io_sq.
+ * @io_sq: IO submit queue struct
+ *
+ * @return - ena_com_dev struct extracted from io_sq
+ */
+static inline struct ena_com_dev *ena_com_io_sq_to_ena_dev(struct ena_com_io_sq *io_sq)
+{
+ return container_of(io_sq, struct ena_com_dev, io_sq_queues[io_sq->qid]);
+}
+
+/* ena_com_io_cq_to_ena_dev - Extract ena_com_dev using contained field io_cq.
+ * @io_sq: IO submit queue struct
+ *
+ * @return - ena_com_dev struct extracted from io_sq
+ */
+static inline struct ena_com_dev *ena_com_io_cq_to_ena_dev(struct ena_com_io_cq *io_cq)
+{
+ return container_of(io_cq, struct ena_com_dev, io_cq_queues[io_cq->qid]);
+}
+
+static inline bool ena_com_get_adaptive_moderation_enabled(struct ena_com_dev *ena_dev)
+{
+ return ena_dev->adaptive_coalescing;
+}
+
+static inline void ena_com_enable_adaptive_moderation(struct ena_com_dev *ena_dev)
+{
+ ena_dev->adaptive_coalescing = true;
+}
+
+static inline void ena_com_disable_adaptive_moderation(struct ena_com_dev *ena_dev)
+{
+ ena_dev->adaptive_coalescing = false;
+}
+
+/* ena_com_get_cap - query whether device supports a capability.
+ * @ena_dev: ENA communication layer struct
+ * @cap_id: enum value representing the capability
+ *
+ * @return - true if capability is supported or false otherwise
+ */
+static inline bool ena_com_get_cap(struct ena_com_dev *ena_dev,
+ enum ena_admin_aq_caps_id cap_id)
+{
+ return !!(ena_dev->capabilities & BIT(cap_id));
+}
+
+/* ena_com_update_intr_reg - Prepare interrupt register
+ * @intr_reg: interrupt register to update.
+ * @rx_delay_interval: Rx interval in usecs
+ * @tx_delay_interval: Tx interval in usecs
+ * @unmask: unmask enable/disable
+ *
+ * Prepare interrupt update register with the supplied parameters.
+ */
+static inline void ena_com_update_intr_reg(struct ena_eth_io_intr_reg *intr_reg,
+ u32 rx_delay_interval,
+ u32 tx_delay_interval,
+ bool unmask)
+{
+ intr_reg->intr_control = 0;
+ intr_reg->intr_control |= rx_delay_interval &
+ ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK;
+
+ intr_reg->intr_control |=
+ (tx_delay_interval << ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT)
+ & ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK;
+
+ if (unmask)
+ intr_reg->intr_control |= ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK;
+}
+
+static inline u8 *ena_com_get_next_bounce_buffer(struct ena_com_io_bounce_buffer_control *bounce_buf_ctrl)
+{
+ u16 size, buffers_num;
+ u8 *buf;
+
+ size = bounce_buf_ctrl->buffer_size;
+ buffers_num = bounce_buf_ctrl->buffers_num;
+
+ buf = bounce_buf_ctrl->base_buffer +
+ (bounce_buf_ctrl->next_to_use++ & (buffers_num - 1)) * size;
+
+ prefetchw(bounce_buf_ctrl->base_buffer +
+ (bounce_buf_ctrl->next_to_use & (buffers_num - 1)) * size);
+
+ return buf;
+}
+
+#endif /* !(ENA_COM) */
diff --git a/drivers/net/ethernet/amazon/ena/ena_common_defs.h b/drivers/net/ethernet/amazon/ena/ena_common_defs.h
new file mode 100644
index 0000000000..e210c8a81f
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_common_defs.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+#ifndef _ENA_COMMON_H_
+#define _ENA_COMMON_H_
+
+#define ENA_COMMON_SPEC_VERSION_MAJOR 2
+#define ENA_COMMON_SPEC_VERSION_MINOR 0
+
+/* ENA operates with 48-bit memory addresses. ena_mem_addr_t */
+struct ena_common_mem_addr {
+ u32 mem_addr_low;
+
+ u16 mem_addr_high;
+
+ /* MBZ */
+ u16 reserved16;
+};
+
+#endif /* _ENA_COMMON_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
new file mode 100644
index 0000000000..f9f886289b
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
@@ -0,0 +1,646 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include "ena_eth_com.h"
+
+static struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc(
+ struct ena_com_io_cq *io_cq)
+{
+ struct ena_eth_io_rx_cdesc_base *cdesc;
+ u16 expected_phase, head_masked;
+ u16 desc_phase;
+
+ head_masked = io_cq->head & (io_cq->q_depth - 1);
+ expected_phase = io_cq->phase;
+
+ cdesc = (struct ena_eth_io_rx_cdesc_base *)(io_cq->cdesc_addr.virt_addr
+ + (head_masked * io_cq->cdesc_entry_size_in_bytes));
+
+ desc_phase = (READ_ONCE(cdesc->status) &
+ ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >>
+ ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT;
+
+ if (desc_phase != expected_phase)
+ return NULL;
+
+ /* Make sure we read the rest of the descriptor after the phase bit
+ * has been read
+ */
+ dma_rmb();
+
+ return cdesc;
+}
+
+static void *get_sq_desc_regular_queue(struct ena_com_io_sq *io_sq)
+{
+ u16 tail_masked;
+ u32 offset;
+
+ tail_masked = io_sq->tail & (io_sq->q_depth - 1);
+
+ offset = tail_masked * io_sq->desc_entry_size;
+
+ return (void *)((uintptr_t)io_sq->desc_addr.virt_addr + offset);
+}
+
+static int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq,
+ u8 *bounce_buffer)
+{
+ struct ena_com_llq_info *llq_info = &io_sq->llq_info;
+
+ u16 dst_tail_mask;
+ u32 dst_offset;
+
+ dst_tail_mask = io_sq->tail & (io_sq->q_depth - 1);
+ dst_offset = dst_tail_mask * llq_info->desc_list_entry_size;
+
+ if (is_llq_max_tx_burst_exists(io_sq)) {
+ if (unlikely(!io_sq->entries_in_tx_burst_left)) {
+ netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "Error: trying to send more packets than tx burst allows\n");
+ return -ENOSPC;
+ }
+
+ io_sq->entries_in_tx_burst_left--;
+ netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "Decreasing entries_in_tx_burst_left of queue %d to %d\n",
+ io_sq->qid, io_sq->entries_in_tx_burst_left);
+ }
+
+ /* Make sure everything was written into the bounce buffer before
+ * writing the bounce buffer to the device
+ */
+ wmb();
+
+ /* The line is completed. Copy it to dev */
+ __iowrite64_copy(io_sq->desc_addr.pbuf_dev_addr + dst_offset,
+ bounce_buffer, (llq_info->desc_list_entry_size) / 8);
+
+ io_sq->tail++;
+
+ /* Switch phase bit in case of wrap around */
+ if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0))
+ io_sq->phase ^= 1;
+
+ return 0;
+}
+
+static int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq,
+ u8 *header_src,
+ u16 header_len)
+{
+ struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
+ struct ena_com_llq_info *llq_info = &io_sq->llq_info;
+ u8 *bounce_buffer = pkt_ctrl->curr_bounce_buf;
+ u16 header_offset;
+
+ if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST))
+ return 0;
+
+ header_offset =
+ llq_info->descs_num_before_header * io_sq->desc_entry_size;
+
+ if (unlikely((header_offset + header_len) >
+ llq_info->desc_list_entry_size)) {
+ netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "Trying to write header larger than llq entry can accommodate\n");
+ return -EFAULT;
+ }
+
+ if (unlikely(!bounce_buffer)) {
+ netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "Bounce buffer is NULL\n");
+ return -EFAULT;
+ }
+
+ memcpy(bounce_buffer + header_offset, header_src, header_len);
+
+ return 0;
+}
+
+static void *get_sq_desc_llq(struct ena_com_io_sq *io_sq)
+{
+ struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
+ u8 *bounce_buffer;
+ void *sq_desc;
+
+ bounce_buffer = pkt_ctrl->curr_bounce_buf;
+
+ if (unlikely(!bounce_buffer)) {
+ netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "Bounce buffer is NULL\n");
+ return NULL;
+ }
+
+ sq_desc = bounce_buffer + pkt_ctrl->idx * io_sq->desc_entry_size;
+ pkt_ctrl->idx++;
+ pkt_ctrl->descs_left_in_line--;
+
+ return sq_desc;
+}
+
+static int ena_com_close_bounce_buffer(struct ena_com_io_sq *io_sq)
+{
+ struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
+ struct ena_com_llq_info *llq_info = &io_sq->llq_info;
+ int rc;
+
+ if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST))
+ return 0;
+
+ /* bounce buffer was used, so write it and get a new one */
+ if (likely(pkt_ctrl->idx)) {
+ rc = ena_com_write_bounce_buffer_to_dev(io_sq,
+ pkt_ctrl->curr_bounce_buf);
+ if (unlikely(rc)) {
+ netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "Failed to write bounce buffer to device\n");
+ return rc;
+ }
+
+ pkt_ctrl->curr_bounce_buf =
+ ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl);
+ memset(io_sq->llq_buf_ctrl.curr_bounce_buf,
+ 0x0, llq_info->desc_list_entry_size);
+ }
+
+ pkt_ctrl->idx = 0;
+ pkt_ctrl->descs_left_in_line = llq_info->descs_num_before_header;
+ return 0;
+}
+
+static void *get_sq_desc(struct ena_com_io_sq *io_sq)
+{
+ if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+ return get_sq_desc_llq(io_sq);
+
+ return get_sq_desc_regular_queue(io_sq);
+}
+
+static int ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq)
+{
+ struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
+ struct ena_com_llq_info *llq_info = &io_sq->llq_info;
+ int rc;
+
+ if (!pkt_ctrl->descs_left_in_line) {
+ rc = ena_com_write_bounce_buffer_to_dev(io_sq,
+ pkt_ctrl->curr_bounce_buf);
+ if (unlikely(rc)) {
+ netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "Failed to write bounce buffer to device\n");
+ return rc;
+ }
+
+ pkt_ctrl->curr_bounce_buf =
+ ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl);
+ memset(io_sq->llq_buf_ctrl.curr_bounce_buf,
+ 0x0, llq_info->desc_list_entry_size);
+
+ pkt_ctrl->idx = 0;
+ if (unlikely(llq_info->desc_stride_ctrl == ENA_ADMIN_SINGLE_DESC_PER_ENTRY))
+ pkt_ctrl->descs_left_in_line = 1;
+ else
+ pkt_ctrl->descs_left_in_line =
+ llq_info->desc_list_entry_size / io_sq->desc_entry_size;
+ }
+
+ return 0;
+}
+
+static int ena_com_sq_update_tail(struct ena_com_io_sq *io_sq)
+{
+ if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+ return ena_com_sq_update_llq_tail(io_sq);
+
+ io_sq->tail++;
+
+ /* Switch phase bit in case of wrap around */
+ if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0))
+ io_sq->phase ^= 1;
+
+ return 0;
+}
+
+static struct ena_eth_io_rx_cdesc_base *
+ ena_com_rx_cdesc_idx_to_ptr(struct ena_com_io_cq *io_cq, u16 idx)
+{
+ idx &= (io_cq->q_depth - 1);
+ return (struct ena_eth_io_rx_cdesc_base *)
+ ((uintptr_t)io_cq->cdesc_addr.virt_addr +
+ idx * io_cq->cdesc_entry_size_in_bytes);
+}
+
+static u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
+ u16 *first_cdesc_idx)
+{
+ struct ena_eth_io_rx_cdesc_base *cdesc;
+ u16 count = 0, head_masked;
+ u32 last = 0;
+
+ do {
+ cdesc = ena_com_get_next_rx_cdesc(io_cq);
+ if (!cdesc)
+ break;
+
+ ena_com_cq_inc_head(io_cq);
+ count++;
+ last = (READ_ONCE(cdesc->status) &
+ ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >>
+ ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT;
+ } while (!last);
+
+ if (last) {
+ *first_cdesc_idx = io_cq->cur_rx_pkt_cdesc_start_idx;
+ count += io_cq->cur_rx_pkt_cdesc_count;
+
+ head_masked = io_cq->head & (io_cq->q_depth - 1);
+
+ io_cq->cur_rx_pkt_cdesc_count = 0;
+ io_cq->cur_rx_pkt_cdesc_start_idx = head_masked;
+
+ netdev_dbg(ena_com_io_cq_to_ena_dev(io_cq)->net_device,
+ "ENA q_id: %d packets were completed. first desc idx %u descs# %d\n",
+ io_cq->qid, *first_cdesc_idx, count);
+ } else {
+ io_cq->cur_rx_pkt_cdesc_count += count;
+ count = 0;
+ }
+
+ return count;
+}
+
+static int ena_com_create_meta(struct ena_com_io_sq *io_sq,
+ struct ena_com_tx_meta *ena_meta)
+{
+ struct ena_eth_io_tx_meta_desc *meta_desc = NULL;
+
+ meta_desc = get_sq_desc(io_sq);
+ if (unlikely(!meta_desc))
+ return -EFAULT;
+
+ memset(meta_desc, 0x0, sizeof(struct ena_eth_io_tx_meta_desc));
+
+ meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_DESC_MASK;
+
+ meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK;
+
+ /* bits 0-9 of the mss */
+ meta_desc->word2 |= ((u32)ena_meta->mss <<
+ ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT) &
+ ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK;
+ /* bits 10-13 of the mss */
+ meta_desc->len_ctrl |= ((ena_meta->mss >> 10) <<
+ ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT) &
+ ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK;
+
+ /* Extended meta desc */
+ meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK;
+ meta_desc->len_ctrl |= ((u32)io_sq->phase <<
+ ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT) &
+ ENA_ETH_IO_TX_META_DESC_PHASE_MASK;
+
+ meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_FIRST_MASK;
+ meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK;
+
+ meta_desc->word2 |= ena_meta->l3_hdr_len &
+ ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK;
+ meta_desc->word2 |= (ena_meta->l3_hdr_offset <<
+ ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT) &
+ ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK;
+
+ meta_desc->word2 |= ((u32)ena_meta->l4_hdr_len <<
+ ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT) &
+ ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK;
+
+ return ena_com_sq_update_tail(io_sq);
+}
+
+static int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
+ struct ena_com_tx_ctx *ena_tx_ctx,
+ bool *have_meta)
+{
+ struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
+
+ /* When disable meta caching is set, don't bother to save the meta and
+ * compare it to the stored version, just create the meta
+ */
+ if (io_sq->disable_meta_caching) {
+ *have_meta = true;
+ return ena_com_create_meta(io_sq, ena_meta);
+ }
+
+ if (ena_com_meta_desc_changed(io_sq, ena_tx_ctx)) {
+ *have_meta = true;
+ /* Cache the meta desc */
+ memcpy(&io_sq->cached_tx_meta, ena_meta,
+ sizeof(struct ena_com_tx_meta));
+ return ena_com_create_meta(io_sq, ena_meta);
+ }
+
+ *have_meta = false;
+ return 0;
+}
+
+static void ena_com_rx_set_flags(struct ena_com_io_cq *io_cq,
+ struct ena_com_rx_ctx *ena_rx_ctx,
+ struct ena_eth_io_rx_cdesc_base *cdesc)
+{
+ ena_rx_ctx->l3_proto = cdesc->status &
+ ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK;
+ ena_rx_ctx->l4_proto =
+ (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK) >>
+ ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT;
+ ena_rx_ctx->l3_csum_err =
+ !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >>
+ ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT);
+ ena_rx_ctx->l4_csum_err =
+ !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >>
+ ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT);
+ ena_rx_ctx->l4_csum_checked =
+ !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_MASK) >>
+ ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_SHIFT);
+ ena_rx_ctx->hash = cdesc->hash;
+ ena_rx_ctx->frag =
+ (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK) >>
+ ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT;
+
+ netdev_dbg(ena_com_io_cq_to_ena_dev(io_cq)->net_device,
+ "l3_proto %d l4_proto %d l3_csum_err %d l4_csum_err %d hash %d frag %d cdesc_status %x\n",
+ ena_rx_ctx->l3_proto, ena_rx_ctx->l4_proto,
+ ena_rx_ctx->l3_csum_err, ena_rx_ctx->l4_csum_err,
+ ena_rx_ctx->hash, ena_rx_ctx->frag, cdesc->status);
+}
+
+/*****************************************************************************/
+/***************************** API **********************************/
+/*****************************************************************************/
+
+int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
+ struct ena_com_tx_ctx *ena_tx_ctx,
+ int *nb_hw_desc)
+{
+ struct ena_eth_io_tx_desc *desc = NULL;
+ struct ena_com_buf *ena_bufs = ena_tx_ctx->ena_bufs;
+ void *buffer_to_push = ena_tx_ctx->push_header;
+ u16 header_len = ena_tx_ctx->header_len;
+ u16 num_bufs = ena_tx_ctx->num_bufs;
+ u16 start_tail = io_sq->tail;
+ int i, rc;
+ bool have_meta;
+ u64 addr_hi;
+
+ WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_TX, "wrong Q type");
+
+ /* num_bufs +1 for potential meta desc */
+ if (unlikely(!ena_com_sq_have_enough_space(io_sq, num_bufs + 1))) {
+ netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "Not enough space in the tx queue\n");
+ return -ENOMEM;
+ }
+
+ if (unlikely(header_len > io_sq->tx_max_header_size)) {
+ netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "Header size is too large %d max header: %d\n",
+ header_len, io_sq->tx_max_header_size);
+ return -EINVAL;
+ }
+
+ if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV &&
+ !buffer_to_push)) {
+ netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "Push header wasn't provided in LLQ mode\n");
+ return -EINVAL;
+ }
+
+ rc = ena_com_write_header_to_bounce(io_sq, buffer_to_push, header_len);
+ if (unlikely(rc))
+ return rc;
+
+ rc = ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx, &have_meta);
+ if (unlikely(rc)) {
+ netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "Failed to create and store tx meta desc\n");
+ return rc;
+ }
+
+ /* If the caller doesn't want to send packets */
+ if (unlikely(!num_bufs && !header_len)) {
+ rc = ena_com_close_bounce_buffer(io_sq);
+ if (rc)
+ netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "Failed to write buffers to LLQ\n");
+ *nb_hw_desc = io_sq->tail - start_tail;
+ return rc;
+ }
+
+ desc = get_sq_desc(io_sq);
+ if (unlikely(!desc))
+ return -EFAULT;
+ memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc));
+
+ /* Set first desc when we don't have meta descriptor */
+ if (!have_meta)
+ desc->len_ctrl |= ENA_ETH_IO_TX_DESC_FIRST_MASK;
+
+ desc->buff_addr_hi_hdr_sz |= ((u32)header_len <<
+ ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT) &
+ ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK;
+ desc->len_ctrl |= ((u32)io_sq->phase << ENA_ETH_IO_TX_DESC_PHASE_SHIFT) &
+ ENA_ETH_IO_TX_DESC_PHASE_MASK;
+
+ desc->len_ctrl |= ENA_ETH_IO_TX_DESC_COMP_REQ_MASK;
+
+ /* Bits 0-9 */
+ desc->meta_ctrl |= ((u32)ena_tx_ctx->req_id <<
+ ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT) &
+ ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK;
+
+ desc->meta_ctrl |= (ena_tx_ctx->df <<
+ ENA_ETH_IO_TX_DESC_DF_SHIFT) &
+ ENA_ETH_IO_TX_DESC_DF_MASK;
+
+ /* Bits 10-15 */
+ desc->len_ctrl |= ((ena_tx_ctx->req_id >> 10) <<
+ ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT) &
+ ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK;
+
+ if (ena_tx_ctx->meta_valid) {
+ desc->meta_ctrl |= (ena_tx_ctx->tso_enable <<
+ ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT) &
+ ENA_ETH_IO_TX_DESC_TSO_EN_MASK;
+ desc->meta_ctrl |= ena_tx_ctx->l3_proto &
+ ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK;
+ desc->meta_ctrl |= (ena_tx_ctx->l4_proto <<
+ ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT) &
+ ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK;
+ desc->meta_ctrl |= (ena_tx_ctx->l3_csum_enable <<
+ ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT) &
+ ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK;
+ desc->meta_ctrl |= (ena_tx_ctx->l4_csum_enable <<
+ ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT) &
+ ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK;
+ desc->meta_ctrl |= (ena_tx_ctx->l4_csum_partial <<
+ ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT) &
+ ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK;
+ }
+
+ for (i = 0; i < num_bufs; i++) {
+ /* The first desc share the same desc as the header */
+ if (likely(i != 0)) {
+ rc = ena_com_sq_update_tail(io_sq);
+ if (unlikely(rc)) {
+ netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "Failed to update sq tail\n");
+ return rc;
+ }
+
+ desc = get_sq_desc(io_sq);
+ if (unlikely(!desc))
+ return -EFAULT;
+
+ memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc));
+
+ desc->len_ctrl |= ((u32)io_sq->phase <<
+ ENA_ETH_IO_TX_DESC_PHASE_SHIFT) &
+ ENA_ETH_IO_TX_DESC_PHASE_MASK;
+ }
+
+ desc->len_ctrl |= ena_bufs->len &
+ ENA_ETH_IO_TX_DESC_LENGTH_MASK;
+
+ addr_hi = ((ena_bufs->paddr &
+ GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32);
+
+ desc->buff_addr_lo = (u32)ena_bufs->paddr;
+ desc->buff_addr_hi_hdr_sz |= addr_hi &
+ ENA_ETH_IO_TX_DESC_ADDR_HI_MASK;
+ ena_bufs++;
+ }
+
+ /* set the last desc indicator */
+ desc->len_ctrl |= ENA_ETH_IO_TX_DESC_LAST_MASK;
+
+ rc = ena_com_sq_update_tail(io_sq);
+ if (unlikely(rc)) {
+ netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "Failed to update sq tail of the last descriptor\n");
+ return rc;
+ }
+
+ rc = ena_com_close_bounce_buffer(io_sq);
+
+ *nb_hw_desc = io_sq->tail - start_tail;
+ return rc;
+}
+
+int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
+ struct ena_com_io_sq *io_sq,
+ struct ena_com_rx_ctx *ena_rx_ctx)
+{
+ struct ena_com_rx_buf_info *ena_buf = &ena_rx_ctx->ena_bufs[0];
+ struct ena_eth_io_rx_cdesc_base *cdesc = NULL;
+ u16 q_depth = io_cq->q_depth;
+ u16 cdesc_idx = 0;
+ u16 nb_hw_desc;
+ u16 i = 0;
+
+ WARN(io_cq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type");
+
+ nb_hw_desc = ena_com_cdesc_rx_pkt_get(io_cq, &cdesc_idx);
+ if (nb_hw_desc == 0) {
+ ena_rx_ctx->descs = nb_hw_desc;
+ return 0;
+ }
+
+ netdev_dbg(ena_com_io_cq_to_ena_dev(io_cq)->net_device,
+ "Fetch rx packet: queue %d completed desc: %d\n", io_cq->qid,
+ nb_hw_desc);
+
+ if (unlikely(nb_hw_desc > ena_rx_ctx->max_bufs)) {
+ netdev_err(ena_com_io_cq_to_ena_dev(io_cq)->net_device,
+ "Too many RX cdescs (%d) > MAX(%d)\n", nb_hw_desc,
+ ena_rx_ctx->max_bufs);
+ return -ENOSPC;
+ }
+
+ cdesc = ena_com_rx_cdesc_idx_to_ptr(io_cq, cdesc_idx);
+ ena_rx_ctx->pkt_offset = cdesc->offset;
+
+ do {
+ ena_buf[i].len = cdesc->length;
+ ena_buf[i].req_id = cdesc->req_id;
+ if (unlikely(ena_buf[i].req_id >= q_depth))
+ return -EIO;
+
+ if (++i >= nb_hw_desc)
+ break;
+
+ cdesc = ena_com_rx_cdesc_idx_to_ptr(io_cq, cdesc_idx + i);
+
+ } while (1);
+
+ /* Update SQ head ptr */
+ io_sq->next_to_comp += nb_hw_desc;
+
+ netdev_dbg(ena_com_io_cq_to_ena_dev(io_cq)->net_device,
+ "[%s][QID#%d] Updating SQ head to: %d\n", __func__,
+ io_sq->qid, io_sq->next_to_comp);
+
+ /* Get rx flags from the last pkt */
+ ena_com_rx_set_flags(io_cq, ena_rx_ctx, cdesc);
+
+ ena_rx_ctx->descs = nb_hw_desc;
+
+ return 0;
+}
+
+int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq,
+ struct ena_com_buf *ena_buf,
+ u16 req_id)
+{
+ struct ena_eth_io_rx_desc *desc;
+
+ WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type");
+
+ if (unlikely(!ena_com_sq_have_enough_space(io_sq, 1)))
+ return -ENOSPC;
+
+ desc = get_sq_desc(io_sq);
+ if (unlikely(!desc))
+ return -EFAULT;
+
+ memset(desc, 0x0, sizeof(struct ena_eth_io_rx_desc));
+
+ desc->length = ena_buf->len;
+
+ desc->ctrl = ENA_ETH_IO_RX_DESC_FIRST_MASK |
+ ENA_ETH_IO_RX_DESC_LAST_MASK |
+ ENA_ETH_IO_RX_DESC_COMP_REQ_MASK |
+ (io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK);
+
+ desc->req_id = req_id;
+
+ netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "[%s] Adding single RX desc, Queue: %u, req_id: %u\n",
+ __func__, io_sq->qid, req_id);
+
+ desc->buff_addr_lo = (u32)ena_buf->paddr;
+ desc->buff_addr_hi =
+ ((ena_buf->paddr & GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32);
+
+ return ena_com_sq_update_tail(io_sq);
+}
+
+bool ena_com_cq_empty(struct ena_com_io_cq *io_cq)
+{
+ struct ena_eth_io_rx_cdesc_base *cdesc;
+
+ cdesc = ena_com_get_next_rx_cdesc(io_cq);
+ if (cdesc)
+ return false;
+ else
+ return true;
+}
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
new file mode 100644
index 0000000000..372b259279
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
@@ -0,0 +1,261 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef ENA_ETH_COM_H_
+#define ENA_ETH_COM_H_
+
+#include "ena_com.h"
+
+/* head update threshold in units of (queue size / ENA_COMP_HEAD_THRESH) */
+#define ENA_COMP_HEAD_THRESH 4
+/* we allow 2 DMA descriptors per LLQ entry */
+#define ENA_LLQ_ENTRY_DESC_CHUNK_SIZE (2 * sizeof(struct ena_eth_io_tx_desc))
+#define ENA_LLQ_HEADER (128UL - ENA_LLQ_ENTRY_DESC_CHUNK_SIZE)
+#define ENA_LLQ_LARGE_HEADER (256UL - ENA_LLQ_ENTRY_DESC_CHUNK_SIZE)
+
+struct ena_com_tx_ctx {
+ struct ena_com_tx_meta ena_meta;
+ struct ena_com_buf *ena_bufs;
+ /* For LLQ, header buffer - pushed to the device mem space */
+ void *push_header;
+
+ enum ena_eth_io_l3_proto_index l3_proto;
+ enum ena_eth_io_l4_proto_index l4_proto;
+ u16 num_bufs;
+ u16 req_id;
+ /* For regular queue, indicate the size of the header
+ * For LLQ, indicate the size of the pushed buffer
+ */
+ u16 header_len;
+
+ u8 meta_valid;
+ u8 tso_enable;
+ u8 l3_csum_enable;
+ u8 l4_csum_enable;
+ u8 l4_csum_partial;
+ u8 df; /* Don't fragment */
+};
+
+struct ena_com_rx_ctx {
+ struct ena_com_rx_buf_info *ena_bufs;
+ enum ena_eth_io_l3_proto_index l3_proto;
+ enum ena_eth_io_l4_proto_index l4_proto;
+ bool l3_csum_err;
+ bool l4_csum_err;
+ u8 l4_csum_checked;
+ /* fragmented packet */
+ bool frag;
+ u32 hash;
+ u16 descs;
+ int max_bufs;
+ u8 pkt_offset;
+};
+
+int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
+ struct ena_com_tx_ctx *ena_tx_ctx,
+ int *nb_hw_desc);
+
+int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
+ struct ena_com_io_sq *io_sq,
+ struct ena_com_rx_ctx *ena_rx_ctx);
+
+int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq,
+ struct ena_com_buf *ena_buf,
+ u16 req_id);
+
+bool ena_com_cq_empty(struct ena_com_io_cq *io_cq);
+
+static inline void ena_com_unmask_intr(struct ena_com_io_cq *io_cq,
+ struct ena_eth_io_intr_reg *intr_reg)
+{
+ writel(intr_reg->intr_control, io_cq->unmask_reg);
+}
+
+static inline int ena_com_free_q_entries(struct ena_com_io_sq *io_sq)
+{
+ u16 tail, next_to_comp, cnt;
+
+ next_to_comp = io_sq->next_to_comp;
+ tail = io_sq->tail;
+ cnt = tail - next_to_comp;
+
+ return io_sq->q_depth - 1 - cnt;
+}
+
+/* Check if the submission queue has enough space to hold required_buffers */
+static inline bool ena_com_sq_have_enough_space(struct ena_com_io_sq *io_sq,
+ u16 required_buffers)
+{
+ int temp;
+
+ if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+ return ena_com_free_q_entries(io_sq) >= required_buffers;
+
+ /* This calculation doesn't need to be 100% accurate. So to reduce
+ * the calculation overhead just Subtract 2 lines from the free descs
+ * (one for the header line and one to compensate the devision
+ * down calculation.
+ */
+ temp = required_buffers / io_sq->llq_info.descs_per_entry + 2;
+
+ return ena_com_free_q_entries(io_sq) > temp;
+}
+
+static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq,
+ struct ena_com_tx_ctx *ena_tx_ctx)
+{
+ if (!ena_tx_ctx->meta_valid)
+ return false;
+
+ return !!memcmp(&io_sq->cached_tx_meta,
+ &ena_tx_ctx->ena_meta,
+ sizeof(struct ena_com_tx_meta));
+}
+
+static inline bool is_llq_max_tx_burst_exists(struct ena_com_io_sq *io_sq)
+{
+ return (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) &&
+ io_sq->llq_info.max_entries_in_tx_burst > 0;
+}
+
+static inline bool ena_com_is_doorbell_needed(struct ena_com_io_sq *io_sq,
+ struct ena_com_tx_ctx *ena_tx_ctx)
+{
+ struct ena_com_llq_info *llq_info;
+ int descs_after_first_entry;
+ int num_entries_needed = 1;
+ u16 num_descs;
+
+ if (!is_llq_max_tx_burst_exists(io_sq))
+ return false;
+
+ llq_info = &io_sq->llq_info;
+ num_descs = ena_tx_ctx->num_bufs;
+
+ if (llq_info->disable_meta_caching ||
+ unlikely(ena_com_meta_desc_changed(io_sq, ena_tx_ctx)))
+ ++num_descs;
+
+ if (num_descs > llq_info->descs_num_before_header) {
+ descs_after_first_entry = num_descs - llq_info->descs_num_before_header;
+ num_entries_needed += DIV_ROUND_UP(descs_after_first_entry,
+ llq_info->descs_per_entry);
+ }
+
+ netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "Queue: %d num_descs: %d num_entries_needed: %d\n",
+ io_sq->qid, num_descs, num_entries_needed);
+
+ return num_entries_needed > io_sq->entries_in_tx_burst_left;
+}
+
+static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
+{
+ u16 max_entries_in_tx_burst = io_sq->llq_info.max_entries_in_tx_burst;
+ u16 tail = io_sq->tail;
+
+ netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "Write submission queue doorbell for queue: %d tail: %d\n",
+ io_sq->qid, tail);
+
+ writel(tail, io_sq->db_addr);
+
+ if (is_llq_max_tx_burst_exists(io_sq)) {
+ netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+ "Reset available entries in tx burst for queue %d to %d\n",
+ io_sq->qid, max_entries_in_tx_burst);
+ io_sq->entries_in_tx_burst_left = max_entries_in_tx_burst;
+ }
+
+ return 0;
+}
+
+static inline int ena_com_update_dev_comp_head(struct ena_com_io_cq *io_cq)
+{
+ u16 unreported_comp, head;
+ bool need_update;
+
+ if (unlikely(io_cq->cq_head_db_reg)) {
+ head = io_cq->head;
+ unreported_comp = head - io_cq->last_head_update;
+ need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH);
+
+ if (unlikely(need_update)) {
+ netdev_dbg(ena_com_io_cq_to_ena_dev(io_cq)->net_device,
+ "Write completion queue doorbell for queue %d: head: %d\n",
+ io_cq->qid, head);
+ writel(head, io_cq->cq_head_db_reg);
+ io_cq->last_head_update = head;
+ }
+ }
+
+ return 0;
+}
+
+static inline void ena_com_update_numa_node(struct ena_com_io_cq *io_cq,
+ u8 numa_node)
+{
+ struct ena_eth_io_numa_node_cfg_reg numa_cfg;
+
+ if (!io_cq->numa_node_cfg_reg)
+ return;
+
+ numa_cfg.numa_cfg = (numa_node & ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK)
+ | ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK;
+
+ writel(numa_cfg.numa_cfg, io_cq->numa_node_cfg_reg);
+}
+
+static inline void ena_com_comp_ack(struct ena_com_io_sq *io_sq, u16 elem)
+{
+ io_sq->next_to_comp += elem;
+}
+
+static inline void ena_com_cq_inc_head(struct ena_com_io_cq *io_cq)
+{
+ io_cq->head++;
+
+ /* Switch phase bit in case of wrap around */
+ if (unlikely((io_cq->head & (io_cq->q_depth - 1)) == 0))
+ io_cq->phase ^= 1;
+}
+
+static inline int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq,
+ u16 *req_id)
+{
+ u8 expected_phase, cdesc_phase;
+ struct ena_eth_io_tx_cdesc *cdesc;
+ u16 masked_head;
+
+ masked_head = io_cq->head & (io_cq->q_depth - 1);
+ expected_phase = io_cq->phase;
+
+ cdesc = (struct ena_eth_io_tx_cdesc *)
+ ((uintptr_t)io_cq->cdesc_addr.virt_addr +
+ (masked_head * io_cq->cdesc_entry_size_in_bytes));
+
+ /* When the current completion descriptor phase isn't the same as the
+ * expected, it mean that the device still didn't update
+ * this completion.
+ */
+ cdesc_phase = READ_ONCE(cdesc->flags) & ENA_ETH_IO_TX_CDESC_PHASE_MASK;
+ if (cdesc_phase != expected_phase)
+ return -EAGAIN;
+
+ dma_rmb();
+
+ *req_id = READ_ONCE(cdesc->req_id);
+ if (unlikely(*req_id >= io_cq->q_depth)) {
+ netdev_err(ena_com_io_cq_to_ena_dev(io_cq)->net_device,
+ "Invalid req id %d\n", cdesc->req_id);
+ return -EINVAL;
+ }
+
+ ena_com_cq_inc_head(io_cq);
+
+ return 0;
+}
+
+#endif /* ENA_ETH_COM_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h
new file mode 100644
index 0000000000..332ac0d28a
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h
@@ -0,0 +1,390 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+#ifndef _ENA_ETH_IO_H_
+#define _ENA_ETH_IO_H_
+
+enum ena_eth_io_l3_proto_index {
+ ENA_ETH_IO_L3_PROTO_UNKNOWN = 0,
+ ENA_ETH_IO_L3_PROTO_IPV4 = 8,
+ ENA_ETH_IO_L3_PROTO_IPV6 = 11,
+ ENA_ETH_IO_L3_PROTO_FCOE = 21,
+ ENA_ETH_IO_L3_PROTO_ROCE = 22,
+};
+
+enum ena_eth_io_l4_proto_index {
+ ENA_ETH_IO_L4_PROTO_UNKNOWN = 0,
+ ENA_ETH_IO_L4_PROTO_TCP = 12,
+ ENA_ETH_IO_L4_PROTO_UDP = 13,
+ ENA_ETH_IO_L4_PROTO_ROUTEABLE_ROCE = 23,
+};
+
+struct ena_eth_io_tx_desc {
+ /* 15:0 : length - Buffer length in bytes, must
+ * include any packet trailers that the ENA supposed
+ * to update like End-to-End CRC, Authentication GMAC
+ * etc. This length must not include the
+ * 'Push_Buffer' length. This length must not include
+ * the 4-byte added in the end for 802.3 Ethernet FCS
+ * 21:16 : req_id_hi - Request ID[15:10]
+ * 22 : reserved22 - MBZ
+ * 23 : meta_desc - MBZ
+ * 24 : phase
+ * 25 : reserved1 - MBZ
+ * 26 : first - Indicates first descriptor in
+ * transaction
+ * 27 : last - Indicates last descriptor in
+ * transaction
+ * 28 : comp_req - Indicates whether completion
+ * should be posted, after packet is transmitted.
+ * Valid only for first descriptor
+ * 30:29 : reserved29 - MBZ
+ * 31 : reserved31 - MBZ
+ */
+ u32 len_ctrl;
+
+ /* 3:0 : l3_proto_idx - L3 protocol. This field
+ * required when l3_csum_en,l3_csum or tso_en are set.
+ * 4 : DF - IPv4 DF, must be 0 if packet is IPv4 and
+ * DF flags of the IPv4 header is 0. Otherwise must
+ * be set to 1
+ * 6:5 : reserved5
+ * 7 : tso_en - Enable TSO, For TCP only.
+ * 12:8 : l4_proto_idx - L4 protocol. This field need
+ * to be set when l4_csum_en or tso_en are set.
+ * 13 : l3_csum_en - enable IPv4 header checksum.
+ * 14 : l4_csum_en - enable TCP/UDP checksum.
+ * 15 : ethernet_fcs_dis - when set, the controller
+ * will not append the 802.3 Ethernet Frame Check
+ * Sequence to the packet
+ * 16 : reserved16
+ * 17 : l4_csum_partial - L4 partial checksum. when
+ * set to 0, the ENA calculates the L4 checksum,
+ * where the Destination Address required for the
+ * TCP/UDP pseudo-header is taken from the actual
+ * packet L3 header. when set to 1, the ENA doesn't
+ * calculate the sum of the pseudo-header, instead,
+ * the checksum field of the L4 is used instead. When
+ * TSO enabled, the checksum of the pseudo-header
+ * must not include the tcp length field. L4 partial
+ * checksum should be used for IPv6 packet that
+ * contains Routing Headers.
+ * 20:18 : reserved18 - MBZ
+ * 21 : reserved21 - MBZ
+ * 31:22 : req_id_lo - Request ID[9:0]
+ */
+ u32 meta_ctrl;
+
+ u32 buff_addr_lo;
+
+ /* address high and header size
+ * 15:0 : addr_hi - Buffer Pointer[47:32]
+ * 23:16 : reserved16_w2
+ * 31:24 : header_length - Header length. For Low
+ * Latency Queues, this fields indicates the number
+ * of bytes written to the headers' memory. For
+ * normal queues, if packet is TCP or UDP, and longer
+ * than max_header_size, then this field should be
+ * set to the sum of L4 header offset and L4 header
+ * size(without options), otherwise, this field
+ * should be set to 0. For both modes, this field
+ * must not exceed the max_header_size.
+ * max_header_size value is reported by the Max
+ * Queues Feature descriptor
+ */
+ u32 buff_addr_hi_hdr_sz;
+};
+
+struct ena_eth_io_tx_meta_desc {
+ /* 9:0 : req_id_lo - Request ID[9:0]
+ * 11:10 : reserved10 - MBZ
+ * 12 : reserved12 - MBZ
+ * 13 : reserved13 - MBZ
+ * 14 : ext_valid - if set, offset fields in Word2
+ * are valid Also MSS High in Word 0 and bits [31:24]
+ * in Word 3
+ * 15 : reserved15
+ * 19:16 : mss_hi
+ * 20 : eth_meta_type - 0: Tx Metadata Descriptor, 1:
+ * Extended Metadata Descriptor
+ * 21 : meta_store - Store extended metadata in queue
+ * cache
+ * 22 : reserved22 - MBZ
+ * 23 : meta_desc - MBO
+ * 24 : phase
+ * 25 : reserved25 - MBZ
+ * 26 : first - Indicates first descriptor in
+ * transaction
+ * 27 : last - Indicates last descriptor in
+ * transaction
+ * 28 : comp_req - Indicates whether completion
+ * should be posted, after packet is transmitted.
+ * Valid only for first descriptor
+ * 30:29 : reserved29 - MBZ
+ * 31 : reserved31 - MBZ
+ */
+ u32 len_ctrl;
+
+ /* 5:0 : req_id_hi
+ * 31:6 : reserved6 - MBZ
+ */
+ u32 word1;
+
+ /* 7:0 : l3_hdr_len
+ * 15:8 : l3_hdr_off
+ * 21:16 : l4_hdr_len_in_words - counts the L4 header
+ * length in words. there is an explicit assumption
+ * that L4 header appears right after L3 header and
+ * L4 offset is based on l3_hdr_off+l3_hdr_len
+ * 31:22 : mss_lo
+ */
+ u32 word2;
+
+ u32 reserved;
+};
+
+struct ena_eth_io_tx_cdesc {
+ /* Request ID[15:0] */
+ u16 req_id;
+
+ u8 status;
+
+ /* flags
+ * 0 : phase
+ * 7:1 : reserved1
+ */
+ u8 flags;
+
+ u16 sub_qid;
+
+ u16 sq_head_idx;
+};
+
+struct ena_eth_io_rx_desc {
+ /* In bytes. 0 means 64KB */
+ u16 length;
+
+ /* MBZ */
+ u8 reserved2;
+
+ /* 0 : phase
+ * 1 : reserved1 - MBZ
+ * 2 : first - Indicates first descriptor in
+ * transaction
+ * 3 : last - Indicates last descriptor in transaction
+ * 4 : comp_req
+ * 5 : reserved5 - MBO
+ * 7:6 : reserved6 - MBZ
+ */
+ u8 ctrl;
+
+ u16 req_id;
+
+ /* MBZ */
+ u16 reserved6;
+
+ u32 buff_addr_lo;
+
+ u16 buff_addr_hi;
+
+ /* MBZ */
+ u16 reserved16_w3;
+};
+
+/* 4-word format Note: all ethernet parsing information are valid only when
+ * last=1
+ */
+struct ena_eth_io_rx_cdesc_base {
+ /* 4:0 : l3_proto_idx
+ * 6:5 : src_vlan_cnt
+ * 7 : reserved7 - MBZ
+ * 12:8 : l4_proto_idx
+ * 13 : l3_csum_err - when set, either the L3
+ * checksum error detected, or, the controller didn't
+ * validate the checksum. This bit is valid only when
+ * l3_proto_idx indicates IPv4 packet
+ * 14 : l4_csum_err - when set, either the L4
+ * checksum error detected, or, the controller didn't
+ * validate the checksum. This bit is valid only when
+ * l4_proto_idx indicates TCP/UDP packet, and,
+ * ipv4_frag is not set. This bit is valid only when
+ * l4_csum_checked below is set.
+ * 15 : ipv4_frag - Indicates IPv4 fragmented packet
+ * 16 : l4_csum_checked - L4 checksum was verified
+ * (could be OK or error), when cleared the status of
+ * checksum is unknown
+ * 23:17 : reserved17 - MBZ
+ * 24 : phase
+ * 25 : l3_csum2 - second checksum engine result
+ * 26 : first - Indicates first descriptor in
+ * transaction
+ * 27 : last - Indicates last descriptor in
+ * transaction
+ * 29:28 : reserved28
+ * 30 : buffer - 0: Metadata descriptor. 1: Buffer
+ * Descriptor was used
+ * 31 : reserved31
+ */
+ u32 status;
+
+ u16 length;
+
+ u16 req_id;
+
+ /* 32-bit hash result */
+ u32 hash;
+
+ u16 sub_qid;
+
+ u8 offset;
+
+ u8 reserved;
+};
+
+/* 8-word format */
+struct ena_eth_io_rx_cdesc_ext {
+ struct ena_eth_io_rx_cdesc_base base;
+
+ u32 buff_addr_lo;
+
+ u16 buff_addr_hi;
+
+ u16 reserved16;
+
+ u32 reserved_w6;
+
+ u32 reserved_w7;
+};
+
+struct ena_eth_io_intr_reg {
+ /* 14:0 : rx_intr_delay
+ * 29:15 : tx_intr_delay
+ * 30 : intr_unmask
+ * 31 : reserved
+ */
+ u32 intr_control;
+};
+
+struct ena_eth_io_numa_node_cfg_reg {
+ /* 7:0 : numa
+ * 30:8 : reserved
+ * 31 : enabled
+ */
+ u32 numa_cfg;
+};
+
+/* tx_desc */
+#define ENA_ETH_IO_TX_DESC_LENGTH_MASK GENMASK(15, 0)
+#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT 16
+#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK GENMASK(21, 16)
+#define ENA_ETH_IO_TX_DESC_META_DESC_SHIFT 23
+#define ENA_ETH_IO_TX_DESC_META_DESC_MASK BIT(23)
+#define ENA_ETH_IO_TX_DESC_PHASE_SHIFT 24
+#define ENA_ETH_IO_TX_DESC_PHASE_MASK BIT(24)
+#define ENA_ETH_IO_TX_DESC_FIRST_SHIFT 26
+#define ENA_ETH_IO_TX_DESC_FIRST_MASK BIT(26)
+#define ENA_ETH_IO_TX_DESC_LAST_SHIFT 27
+#define ENA_ETH_IO_TX_DESC_LAST_MASK BIT(27)
+#define ENA_ETH_IO_TX_DESC_COMP_REQ_SHIFT 28
+#define ENA_ETH_IO_TX_DESC_COMP_REQ_MASK BIT(28)
+#define ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK GENMASK(3, 0)
+#define ENA_ETH_IO_TX_DESC_DF_SHIFT 4
+#define ENA_ETH_IO_TX_DESC_DF_MASK BIT(4)
+#define ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT 7
+#define ENA_ETH_IO_TX_DESC_TSO_EN_MASK BIT(7)
+#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT 8
+#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK GENMASK(12, 8)
+#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT 13
+#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK BIT(13)
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT 14
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK BIT(14)
+#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_SHIFT 15
+#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_MASK BIT(15)
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT 17
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK BIT(17)
+#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT 22
+#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK GENMASK(31, 22)
+#define ENA_ETH_IO_TX_DESC_ADDR_HI_MASK GENMASK(15, 0)
+#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT 24
+#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK GENMASK(31, 24)
+
+/* tx_meta_desc */
+#define ENA_ETH_IO_TX_META_DESC_REQ_ID_LO_MASK GENMASK(9, 0)
+#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_SHIFT 14
+#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK BIT(14)
+#define ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT 16
+#define ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK GENMASK(19, 16)
+#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_SHIFT 20
+#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK BIT(20)
+#define ENA_ETH_IO_TX_META_DESC_META_STORE_SHIFT 21
+#define ENA_ETH_IO_TX_META_DESC_META_STORE_MASK BIT(21)
+#define ENA_ETH_IO_TX_META_DESC_META_DESC_SHIFT 23
+#define ENA_ETH_IO_TX_META_DESC_META_DESC_MASK BIT(23)
+#define ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT 24
+#define ENA_ETH_IO_TX_META_DESC_PHASE_MASK BIT(24)
+#define ENA_ETH_IO_TX_META_DESC_FIRST_SHIFT 26
+#define ENA_ETH_IO_TX_META_DESC_FIRST_MASK BIT(26)
+#define ENA_ETH_IO_TX_META_DESC_LAST_SHIFT 27
+#define ENA_ETH_IO_TX_META_DESC_LAST_MASK BIT(27)
+#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_SHIFT 28
+#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_MASK BIT(28)
+#define ENA_ETH_IO_TX_META_DESC_REQ_ID_HI_MASK GENMASK(5, 0)
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK GENMASK(7, 0)
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT 8
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK GENMASK(15, 8)
+#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT 16
+#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK GENMASK(21, 16)
+#define ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT 22
+#define ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK GENMASK(31, 22)
+
+/* tx_cdesc */
+#define ENA_ETH_IO_TX_CDESC_PHASE_MASK BIT(0)
+
+/* rx_desc */
+#define ENA_ETH_IO_RX_DESC_PHASE_MASK BIT(0)
+#define ENA_ETH_IO_RX_DESC_FIRST_SHIFT 2
+#define ENA_ETH_IO_RX_DESC_FIRST_MASK BIT(2)
+#define ENA_ETH_IO_RX_DESC_LAST_SHIFT 3
+#define ENA_ETH_IO_RX_DESC_LAST_MASK BIT(3)
+#define ENA_ETH_IO_RX_DESC_COMP_REQ_SHIFT 4
+#define ENA_ETH_IO_RX_DESC_COMP_REQ_MASK BIT(4)
+
+/* rx_cdesc_base */
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK GENMASK(4, 0)
+#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_SHIFT 5
+#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_MASK GENMASK(6, 5)
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT 8
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK GENMASK(12, 8)
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT 13
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK BIT(13)
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT 14
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK BIT(14)
+#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT 15
+#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK BIT(15)
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_SHIFT 16
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_MASK BIT(16)
+#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT 24
+#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK BIT(24)
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_SHIFT 25
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_MASK BIT(25)
+#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_SHIFT 26
+#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_MASK BIT(26)
+#define ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT 27
+#define ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK BIT(27)
+#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_SHIFT 30
+#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_MASK BIT(30)
+
+/* intr_reg */
+#define ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK GENMASK(14, 0)
+#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT 15
+#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK GENMASK(29, 15)
+#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_SHIFT 30
+#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK BIT(30)
+
+/* numa_node_cfg_reg */
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK GENMASK(7, 0)
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_SHIFT 31
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK BIT(31)
+
+#endif /* _ENA_ETH_IO_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
new file mode 100644
index 0000000000..d671df4b76
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -0,0 +1,1066 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include <linux/ethtool.h>
+#include <linux/pci.h>
+
+#include "ena_netdev.h"
+
+struct ena_stats {
+ char name[ETH_GSTRING_LEN];
+ int stat_offset;
+};
+
+#define ENA_STAT_ENA_COM_ENTRY(stat) { \
+ .name = #stat, \
+ .stat_offset = offsetof(struct ena_com_stats_admin, stat) / sizeof(u64) \
+}
+
+#define ENA_STAT_ENTRY(stat, stat_type) { \
+ .name = #stat, \
+ .stat_offset = offsetof(struct ena_stats_##stat_type, stat) / sizeof(u64) \
+}
+
+#define ENA_STAT_HW_ENTRY(stat, stat_type) { \
+ .name = #stat, \
+ .stat_offset = offsetof(struct ena_admin_##stat_type, stat) / sizeof(u64) \
+}
+
+#define ENA_STAT_RX_ENTRY(stat) \
+ ENA_STAT_ENTRY(stat, rx)
+
+#define ENA_STAT_TX_ENTRY(stat) \
+ ENA_STAT_ENTRY(stat, tx)
+
+#define ENA_STAT_GLOBAL_ENTRY(stat) \
+ ENA_STAT_ENTRY(stat, dev)
+
+#define ENA_STAT_ENI_ENTRY(stat) \
+ ENA_STAT_HW_ENTRY(stat, eni_stats)
+
+static const struct ena_stats ena_stats_global_strings[] = {
+ ENA_STAT_GLOBAL_ENTRY(tx_timeout),
+ ENA_STAT_GLOBAL_ENTRY(suspend),
+ ENA_STAT_GLOBAL_ENTRY(resume),
+ ENA_STAT_GLOBAL_ENTRY(wd_expired),
+ ENA_STAT_GLOBAL_ENTRY(interface_up),
+ ENA_STAT_GLOBAL_ENTRY(interface_down),
+ ENA_STAT_GLOBAL_ENTRY(admin_q_pause),
+};
+
+static const struct ena_stats ena_stats_eni_strings[] = {
+ ENA_STAT_ENI_ENTRY(bw_in_allowance_exceeded),
+ ENA_STAT_ENI_ENTRY(bw_out_allowance_exceeded),
+ ENA_STAT_ENI_ENTRY(pps_allowance_exceeded),
+ ENA_STAT_ENI_ENTRY(conntrack_allowance_exceeded),
+ ENA_STAT_ENI_ENTRY(linklocal_allowance_exceeded),
+};
+
+static const struct ena_stats ena_stats_tx_strings[] = {
+ ENA_STAT_TX_ENTRY(cnt),
+ ENA_STAT_TX_ENTRY(bytes),
+ ENA_STAT_TX_ENTRY(queue_stop),
+ ENA_STAT_TX_ENTRY(queue_wakeup),
+ ENA_STAT_TX_ENTRY(dma_mapping_err),
+ ENA_STAT_TX_ENTRY(linearize),
+ ENA_STAT_TX_ENTRY(linearize_failed),
+ ENA_STAT_TX_ENTRY(napi_comp),
+ ENA_STAT_TX_ENTRY(tx_poll),
+ ENA_STAT_TX_ENTRY(doorbells),
+ ENA_STAT_TX_ENTRY(prepare_ctx_err),
+ ENA_STAT_TX_ENTRY(bad_req_id),
+ ENA_STAT_TX_ENTRY(llq_buffer_copy),
+ ENA_STAT_TX_ENTRY(missed_tx),
+ ENA_STAT_TX_ENTRY(unmask_interrupt),
+};
+
+static const struct ena_stats ena_stats_rx_strings[] = {
+ ENA_STAT_RX_ENTRY(cnt),
+ ENA_STAT_RX_ENTRY(bytes),
+ ENA_STAT_RX_ENTRY(rx_copybreak_pkt),
+ ENA_STAT_RX_ENTRY(csum_good),
+ ENA_STAT_RX_ENTRY(refil_partial),
+ ENA_STAT_RX_ENTRY(csum_bad),
+ ENA_STAT_RX_ENTRY(page_alloc_fail),
+ ENA_STAT_RX_ENTRY(skb_alloc_fail),
+ ENA_STAT_RX_ENTRY(dma_mapping_err),
+ ENA_STAT_RX_ENTRY(bad_desc_num),
+ ENA_STAT_RX_ENTRY(bad_req_id),
+ ENA_STAT_RX_ENTRY(empty_rx_ring),
+ ENA_STAT_RX_ENTRY(csum_unchecked),
+ ENA_STAT_RX_ENTRY(xdp_aborted),
+ ENA_STAT_RX_ENTRY(xdp_drop),
+ ENA_STAT_RX_ENTRY(xdp_pass),
+ ENA_STAT_RX_ENTRY(xdp_tx),
+ ENA_STAT_RX_ENTRY(xdp_invalid),
+ ENA_STAT_RX_ENTRY(xdp_redirect),
+};
+
+static const struct ena_stats ena_stats_ena_com_strings[] = {
+ ENA_STAT_ENA_COM_ENTRY(aborted_cmd),
+ ENA_STAT_ENA_COM_ENTRY(submitted_cmd),
+ ENA_STAT_ENA_COM_ENTRY(completed_cmd),
+ ENA_STAT_ENA_COM_ENTRY(out_of_space),
+ ENA_STAT_ENA_COM_ENTRY(no_completion),
+};
+
+#define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings)
+#define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings)
+#define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings)
+#define ENA_STATS_ARRAY_ENA_COM ARRAY_SIZE(ena_stats_ena_com_strings)
+#define ENA_STATS_ARRAY_ENI(adapter) ARRAY_SIZE(ena_stats_eni_strings)
+
+static void ena_safe_update_stat(u64 *src, u64 *dst,
+ struct u64_stats_sync *syncp)
+{
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin(syncp);
+ *(dst) = *src;
+ } while (u64_stats_fetch_retry(syncp, start));
+}
+
+static void ena_queue_stats(struct ena_adapter *adapter, u64 **data)
+{
+ const struct ena_stats *ena_stats;
+ struct ena_ring *ring;
+
+ u64 *ptr;
+ int i, j;
+
+ for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
+ /* Tx stats */
+ ring = &adapter->tx_ring[i];
+
+ for (j = 0; j < ENA_STATS_ARRAY_TX; j++) {
+ ena_stats = &ena_stats_tx_strings[j];
+
+ ptr = (u64 *)&ring->tx_stats + ena_stats->stat_offset;
+
+ ena_safe_update_stat(ptr, (*data)++, &ring->syncp);
+ }
+ /* XDP TX queues don't have a RX queue counterpart */
+ if (!ENA_IS_XDP_INDEX(adapter, i)) {
+ /* Rx stats */
+ ring = &adapter->rx_ring[i];
+
+ for (j = 0; j < ENA_STATS_ARRAY_RX; j++) {
+ ena_stats = &ena_stats_rx_strings[j];
+
+ ptr = (u64 *)&ring->rx_stats +
+ ena_stats->stat_offset;
+
+ ena_safe_update_stat(ptr, (*data)++, &ring->syncp);
+ }
+ }
+ }
+}
+
+static void ena_dev_admin_queue_stats(struct ena_adapter *adapter, u64 **data)
+{
+ const struct ena_stats *ena_stats;
+ u64 *ptr;
+ int i;
+
+ for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) {
+ ena_stats = &ena_stats_ena_com_strings[i];
+
+ ptr = (u64 *)&adapter->ena_dev->admin_queue.stats +
+ ena_stats->stat_offset;
+
+ *(*data)++ = *ptr;
+ }
+}
+
+static void ena_get_stats(struct ena_adapter *adapter,
+ u64 *data,
+ bool eni_stats_needed)
+{
+ const struct ena_stats *ena_stats;
+ u64 *ptr;
+ int i;
+
+ for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) {
+ ena_stats = &ena_stats_global_strings[i];
+
+ ptr = (u64 *)&adapter->dev_stats + ena_stats->stat_offset;
+
+ ena_safe_update_stat(ptr, data++, &adapter->syncp);
+ }
+
+ if (eni_stats_needed) {
+ ena_update_hw_stats(adapter);
+ for (i = 0; i < ENA_STATS_ARRAY_ENI(adapter); i++) {
+ ena_stats = &ena_stats_eni_strings[i];
+
+ ptr = (u64 *)&adapter->eni_stats +
+ ena_stats->stat_offset;
+
+ ena_safe_update_stat(ptr, data++, &adapter->syncp);
+ }
+ }
+
+ ena_queue_stats(adapter, &data);
+ ena_dev_admin_queue_stats(adapter, &data);
+}
+
+static void ena_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats *stats,
+ u64 *data)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+ struct ena_com_dev *dev = adapter->ena_dev;
+
+ ena_get_stats(adapter, data, ena_com_get_cap(dev, ENA_ADMIN_ENI_STATS));
+}
+
+static int ena_get_sw_stats_count(struct ena_adapter *adapter)
+{
+ return adapter->num_io_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX)
+ + adapter->xdp_num_queues * ENA_STATS_ARRAY_TX
+ + ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM;
+}
+
+static int ena_get_hw_stats_count(struct ena_adapter *adapter)
+{
+ bool supported = ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENI_STATS);
+
+ return ENA_STATS_ARRAY_ENI(adapter) * supported;
+}
+
+int ena_get_sset_count(struct net_device *netdev, int sset)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+
+ switch (sset) {
+ case ETH_SS_STATS:
+ return ena_get_sw_stats_count(adapter) +
+ ena_get_hw_stats_count(adapter);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static void ena_queue_strings(struct ena_adapter *adapter, u8 **data)
+{
+ const struct ena_stats *ena_stats;
+ bool is_xdp;
+ int i, j;
+
+ for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
+ is_xdp = ENA_IS_XDP_INDEX(adapter, i);
+ /* Tx stats */
+ for (j = 0; j < ENA_STATS_ARRAY_TX; j++) {
+ ena_stats = &ena_stats_tx_strings[j];
+
+ ethtool_sprintf(data,
+ "queue_%u_%s_%s", i,
+ is_xdp ? "xdp_tx" : "tx",
+ ena_stats->name);
+ }
+
+ if (!is_xdp) {
+ /* RX stats, in XDP there isn't a RX queue
+ * counterpart
+ */
+ for (j = 0; j < ENA_STATS_ARRAY_RX; j++) {
+ ena_stats = &ena_stats_rx_strings[j];
+
+ ethtool_sprintf(data,
+ "queue_%u_rx_%s", i,
+ ena_stats->name);
+ }
+ }
+ }
+}
+
+static void ena_com_dev_strings(u8 **data)
+{
+ const struct ena_stats *ena_stats;
+ int i;
+
+ for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) {
+ ena_stats = &ena_stats_ena_com_strings[i];
+
+ ethtool_sprintf(data,
+ "ena_admin_q_%s", ena_stats->name);
+ }
+}
+
+static void ena_get_strings(struct ena_adapter *adapter,
+ u8 *data,
+ bool eni_stats_needed)
+{
+ const struct ena_stats *ena_stats;
+ int i;
+
+ for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) {
+ ena_stats = &ena_stats_global_strings[i];
+ ethtool_sprintf(&data, ena_stats->name);
+ }
+
+ if (eni_stats_needed) {
+ for (i = 0; i < ENA_STATS_ARRAY_ENI(adapter); i++) {
+ ena_stats = &ena_stats_eni_strings[i];
+ ethtool_sprintf(&data, ena_stats->name);
+ }
+ }
+
+ ena_queue_strings(adapter, &data);
+ ena_com_dev_strings(&data);
+}
+
+static void ena_get_ethtool_strings(struct net_device *netdev,
+ u32 sset,
+ u8 *data)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+ struct ena_com_dev *dev = adapter->ena_dev;
+
+ switch (sset) {
+ case ETH_SS_STATS:
+ ena_get_strings(adapter, data, ena_com_get_cap(dev, ENA_ADMIN_ENI_STATS));
+ break;
+ }
+}
+
+static int ena_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ struct ena_admin_get_feature_link_desc *link;
+ struct ena_admin_get_feat_resp feat_resp;
+ int rc;
+
+ rc = ena_com_get_link_params(ena_dev, &feat_resp);
+ if (rc)
+ return rc;
+
+ link = &feat_resp.u.link;
+ link_ksettings->base.speed = link->speed;
+
+ if (link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) {
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, Autoneg);
+ }
+
+ link_ksettings->base.autoneg =
+ (link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) ?
+ AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+ link_ksettings->base.duplex = DUPLEX_FULL;
+
+ return 0;
+}
+
+static int ena_get_coalesce(struct net_device *net_dev,
+ struct ethtool_coalesce *coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
+{
+ struct ena_adapter *adapter = netdev_priv(net_dev);
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+
+ if (!ena_com_interrupt_moderation_supported(ena_dev))
+ return -EOPNOTSUPP;
+
+ coalesce->tx_coalesce_usecs =
+ ena_com_get_nonadaptive_moderation_interval_tx(ena_dev) *
+ ena_dev->intr_delay_resolution;
+
+ coalesce->rx_coalesce_usecs =
+ ena_com_get_nonadaptive_moderation_interval_rx(ena_dev)
+ * ena_dev->intr_delay_resolution;
+
+ coalesce->use_adaptive_rx_coalesce =
+ ena_com_get_adaptive_moderation_enabled(ena_dev);
+
+ return 0;
+}
+
+static void ena_update_tx_rings_nonadaptive_intr_moderation(struct ena_adapter *adapter)
+{
+ unsigned int val;
+ int i;
+
+ val = ena_com_get_nonadaptive_moderation_interval_tx(adapter->ena_dev);
+
+ for (i = 0; i < adapter->num_io_queues; i++)
+ adapter->tx_ring[i].smoothed_interval = val;
+}
+
+static void ena_update_rx_rings_nonadaptive_intr_moderation(struct ena_adapter *adapter)
+{
+ unsigned int val;
+ int i;
+
+ val = ena_com_get_nonadaptive_moderation_interval_rx(adapter->ena_dev);
+
+ for (i = 0; i < adapter->num_io_queues; i++)
+ adapter->rx_ring[i].smoothed_interval = val;
+}
+
+static int ena_set_coalesce(struct net_device *net_dev,
+ struct ethtool_coalesce *coalesce,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
+{
+ struct ena_adapter *adapter = netdev_priv(net_dev);
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ int rc;
+
+ if (!ena_com_interrupt_moderation_supported(ena_dev))
+ return -EOPNOTSUPP;
+
+ rc = ena_com_update_nonadaptive_moderation_interval_tx(ena_dev,
+ coalesce->tx_coalesce_usecs);
+ if (rc)
+ return rc;
+
+ ena_update_tx_rings_nonadaptive_intr_moderation(adapter);
+
+ rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev,
+ coalesce->rx_coalesce_usecs);
+ if (rc)
+ return rc;
+
+ ena_update_rx_rings_nonadaptive_intr_moderation(adapter);
+
+ if (coalesce->use_adaptive_rx_coalesce &&
+ !ena_com_get_adaptive_moderation_enabled(ena_dev))
+ ena_com_enable_adaptive_moderation(ena_dev);
+
+ if (!coalesce->use_adaptive_rx_coalesce &&
+ ena_com_get_adaptive_moderation_enabled(ena_dev))
+ ena_com_disable_adaptive_moderation(ena_dev);
+
+ return 0;
+}
+
+static u32 ena_get_msglevel(struct net_device *netdev)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+
+ return adapter->msg_enable;
+}
+
+static void ena_set_msglevel(struct net_device *netdev, u32 value)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+
+ adapter->msg_enable = value;
+}
+
+static void ena_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *info)
+{
+ struct ena_adapter *adapter = netdev_priv(dev);
+
+ strscpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver));
+ strscpy(info->bus_info, pci_name(adapter->pdev),
+ sizeof(info->bus_info));
+}
+
+static void ena_get_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring,
+ struct kernel_ethtool_ringparam *kernel_ring,
+ struct netlink_ext_ack *extack)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+
+ ring->tx_max_pending = adapter->max_tx_ring_size;
+ ring->rx_max_pending = adapter->max_rx_ring_size;
+ if (adapter->ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+ bool large_llq_supported = adapter->large_llq_header_supported;
+
+ kernel_ring->tx_push = true;
+ kernel_ring->tx_push_buf_len = adapter->ena_dev->tx_max_header_size;
+ if (large_llq_supported)
+ kernel_ring->tx_push_buf_max_len = ENA_LLQ_LARGE_HEADER;
+ else
+ kernel_ring->tx_push_buf_max_len = ENA_LLQ_HEADER;
+ } else {
+ kernel_ring->tx_push = false;
+ kernel_ring->tx_push_buf_max_len = 0;
+ kernel_ring->tx_push_buf_len = 0;
+ }
+
+ ring->tx_pending = adapter->tx_ring[0].ring_size;
+ ring->rx_pending = adapter->rx_ring[0].ring_size;
+}
+
+static int ena_set_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring,
+ struct kernel_ethtool_ringparam *kernel_ring,
+ struct netlink_ext_ack *extack)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+ u32 new_tx_size, new_rx_size, new_tx_push_buf_len;
+ bool changed = false;
+
+ new_tx_size = ring->tx_pending < ENA_MIN_RING_SIZE ?
+ ENA_MIN_RING_SIZE : ring->tx_pending;
+ new_tx_size = rounddown_pow_of_two(new_tx_size);
+
+ new_rx_size = ring->rx_pending < ENA_MIN_RING_SIZE ?
+ ENA_MIN_RING_SIZE : ring->rx_pending;
+ new_rx_size = rounddown_pow_of_two(new_rx_size);
+
+ changed |= new_tx_size != adapter->requested_tx_ring_size ||
+ new_rx_size != adapter->requested_rx_ring_size;
+
+ /* This value is ignored if LLQ is not supported */
+ new_tx_push_buf_len = adapter->ena_dev->tx_max_header_size;
+
+ if ((adapter->ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) !=
+ kernel_ring->tx_push) {
+ NL_SET_ERR_MSG_MOD(extack, "Push mode state cannot be modified");
+ return -EINVAL;
+ }
+
+ /* Validate that the push buffer is supported on the underlying device */
+ if (kernel_ring->tx_push_buf_len) {
+ enum ena_admin_placement_policy_type placement;
+
+ new_tx_push_buf_len = kernel_ring->tx_push_buf_len;
+
+ placement = adapter->ena_dev->tx_mem_queue_type;
+ if (placement == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+ return -EOPNOTSUPP;
+
+ if (new_tx_push_buf_len != ENA_LLQ_HEADER &&
+ new_tx_push_buf_len != ENA_LLQ_LARGE_HEADER) {
+ bool large_llq_sup = adapter->large_llq_header_supported;
+ char large_llq_size_str[40];
+
+ snprintf(large_llq_size_str, 40, ", %lu", ENA_LLQ_LARGE_HEADER);
+
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Supported tx push buff values: [%lu%s]",
+ ENA_LLQ_HEADER,
+ large_llq_sup ? large_llq_size_str : "");
+
+ return -EINVAL;
+ }
+
+ changed |= new_tx_push_buf_len != adapter->ena_dev->tx_max_header_size;
+ }
+
+ if (!changed)
+ return 0;
+
+ return ena_update_queue_params(adapter, new_tx_size, new_rx_size,
+ new_tx_push_buf_len);
+}
+
+static u32 ena_flow_hash_to_flow_type(u16 hash_fields)
+{
+ u32 data = 0;
+
+ if (hash_fields & ENA_ADMIN_RSS_L2_DA)
+ data |= RXH_L2DA;
+
+ if (hash_fields & ENA_ADMIN_RSS_L3_DA)
+ data |= RXH_IP_DST;
+
+ if (hash_fields & ENA_ADMIN_RSS_L3_SA)
+ data |= RXH_IP_SRC;
+
+ if (hash_fields & ENA_ADMIN_RSS_L4_DP)
+ data |= RXH_L4_B_2_3;
+
+ if (hash_fields & ENA_ADMIN_RSS_L4_SP)
+ data |= RXH_L4_B_0_1;
+
+ return data;
+}
+
+static u16 ena_flow_data_to_flow_hash(u32 hash_fields)
+{
+ u16 data = 0;
+
+ if (hash_fields & RXH_L2DA)
+ data |= ENA_ADMIN_RSS_L2_DA;
+
+ if (hash_fields & RXH_IP_DST)
+ data |= ENA_ADMIN_RSS_L3_DA;
+
+ if (hash_fields & RXH_IP_SRC)
+ data |= ENA_ADMIN_RSS_L3_SA;
+
+ if (hash_fields & RXH_L4_B_2_3)
+ data |= ENA_ADMIN_RSS_L4_DP;
+
+ if (hash_fields & RXH_L4_B_0_1)
+ data |= ENA_ADMIN_RSS_L4_SP;
+
+ return data;
+}
+
+static int ena_get_rss_hash(struct ena_com_dev *ena_dev,
+ struct ethtool_rxnfc *cmd)
+{
+ enum ena_admin_flow_hash_proto proto;
+ u16 hash_fields;
+ int rc;
+
+ cmd->data = 0;
+
+ switch (cmd->flow_type) {
+ case TCP_V4_FLOW:
+ proto = ENA_ADMIN_RSS_TCP4;
+ break;
+ case UDP_V4_FLOW:
+ proto = ENA_ADMIN_RSS_UDP4;
+ break;
+ case TCP_V6_FLOW:
+ proto = ENA_ADMIN_RSS_TCP6;
+ break;
+ case UDP_V6_FLOW:
+ proto = ENA_ADMIN_RSS_UDP6;
+ break;
+ case IPV4_FLOW:
+ proto = ENA_ADMIN_RSS_IP4;
+ break;
+ case IPV6_FLOW:
+ proto = ENA_ADMIN_RSS_IP6;
+ break;
+ case ETHER_FLOW:
+ proto = ENA_ADMIN_RSS_NOT_IP;
+ break;
+ case AH_V4_FLOW:
+ case ESP_V4_FLOW:
+ case AH_V6_FLOW:
+ case ESP_V6_FLOW:
+ case SCTP_V4_FLOW:
+ case AH_ESP_V4_FLOW:
+ return -EOPNOTSUPP;
+ default:
+ return -EINVAL;
+ }
+
+ rc = ena_com_get_hash_ctrl(ena_dev, proto, &hash_fields);
+ if (rc)
+ return rc;
+
+ cmd->data = ena_flow_hash_to_flow_type(hash_fields);
+
+ return 0;
+}
+
+static int ena_set_rss_hash(struct ena_com_dev *ena_dev,
+ struct ethtool_rxnfc *cmd)
+{
+ enum ena_admin_flow_hash_proto proto;
+ u16 hash_fields;
+
+ switch (cmd->flow_type) {
+ case TCP_V4_FLOW:
+ proto = ENA_ADMIN_RSS_TCP4;
+ break;
+ case UDP_V4_FLOW:
+ proto = ENA_ADMIN_RSS_UDP4;
+ break;
+ case TCP_V6_FLOW:
+ proto = ENA_ADMIN_RSS_TCP6;
+ break;
+ case UDP_V6_FLOW:
+ proto = ENA_ADMIN_RSS_UDP6;
+ break;
+ case IPV4_FLOW:
+ proto = ENA_ADMIN_RSS_IP4;
+ break;
+ case IPV6_FLOW:
+ proto = ENA_ADMIN_RSS_IP6;
+ break;
+ case ETHER_FLOW:
+ proto = ENA_ADMIN_RSS_NOT_IP;
+ break;
+ case AH_V4_FLOW:
+ case ESP_V4_FLOW:
+ case AH_V6_FLOW:
+ case ESP_V6_FLOW:
+ case SCTP_V4_FLOW:
+ case AH_ESP_V4_FLOW:
+ return -EOPNOTSUPP;
+ default:
+ return -EINVAL;
+ }
+
+ hash_fields = ena_flow_data_to_flow_hash(cmd->data);
+
+ return ena_com_fill_hash_ctrl(ena_dev, proto, hash_fields);
+}
+
+static int ena_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+ int rc = 0;
+
+ switch (info->cmd) {
+ case ETHTOOL_SRXFH:
+ rc = ena_set_rss_hash(adapter->ena_dev, info);
+ break;
+ case ETHTOOL_SRXCLSRLDEL:
+ case ETHTOOL_SRXCLSRLINS:
+ default:
+ netif_err(adapter, drv, netdev,
+ "Command parameter %d is not supported\n", info->cmd);
+ rc = -EOPNOTSUPP;
+ }
+
+ return rc;
+}
+
+static int ena_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info,
+ u32 *rules)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+ int rc = 0;
+
+ switch (info->cmd) {
+ case ETHTOOL_GRXRINGS:
+ info->data = adapter->num_io_queues;
+ rc = 0;
+ break;
+ case ETHTOOL_GRXFH:
+ rc = ena_get_rss_hash(adapter->ena_dev, info);
+ break;
+ case ETHTOOL_GRXCLSRLCNT:
+ case ETHTOOL_GRXCLSRULE:
+ case ETHTOOL_GRXCLSRLALL:
+ default:
+ netif_err(adapter, drv, netdev,
+ "Command parameter %d is not supported\n", info->cmd);
+ rc = -EOPNOTSUPP;
+ }
+
+ return rc;
+}
+
+static u32 ena_get_rxfh_indir_size(struct net_device *netdev)
+{
+ return ENA_RX_RSS_TABLE_SIZE;
+}
+
+static u32 ena_get_rxfh_key_size(struct net_device *netdev)
+{
+ return ENA_HASH_KEY_SIZE;
+}
+
+static int ena_indirection_table_set(struct ena_adapter *adapter,
+ const u32 *indir)
+{
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ int i, rc;
+
+ for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
+ rc = ena_com_indirect_table_fill_entry(ena_dev,
+ i,
+ ENA_IO_RXQ_IDX(indir[i]));
+ if (unlikely(rc)) {
+ netif_err(adapter, drv, adapter->netdev,
+ "Cannot fill indirect table (index is too large)\n");
+ return rc;
+ }
+ }
+
+ rc = ena_com_indirect_table_set(ena_dev);
+ if (rc) {
+ netif_err(adapter, drv, adapter->netdev,
+ "Cannot set indirect table\n");
+ return rc == -EPERM ? -EOPNOTSUPP : rc;
+ }
+ return rc;
+}
+
+static int ena_indirection_table_get(struct ena_adapter *adapter, u32 *indir)
+{
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ int i, rc;
+
+ if (!indir)
+ return 0;
+
+ rc = ena_com_indirect_table_get(ena_dev, indir);
+ if (rc)
+ return rc;
+
+ /* Our internal representation of the indices is: even indices
+ * for Tx and uneven indices for Rx. We need to convert the Rx
+ * indices to be consecutive
+ */
+ for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++)
+ indir[i] = ENA_IO_RXQ_IDX_TO_COMBINED_IDX(indir[i]);
+
+ return rc;
+}
+
+static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+ u8 *hfunc)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+ enum ena_admin_hash_functions ena_func;
+ u8 func;
+ int rc;
+
+ rc = ena_indirection_table_get(adapter, indir);
+ if (rc)
+ return rc;
+
+ /* We call this function in order to check if the device
+ * supports getting/setting the hash function.
+ */
+ rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func);
+ if (rc) {
+ if (rc == -EOPNOTSUPP)
+ rc = 0;
+
+ return rc;
+ }
+
+ rc = ena_com_get_hash_key(adapter->ena_dev, key);
+ if (rc)
+ return rc;
+
+ switch (ena_func) {
+ case ENA_ADMIN_TOEPLITZ:
+ func = ETH_RSS_HASH_TOP;
+ break;
+ case ENA_ADMIN_CRC32:
+ func = ETH_RSS_HASH_CRC32;
+ break;
+ default:
+ netif_err(adapter, drv, netdev,
+ "Command parameter is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (hfunc)
+ *hfunc = func;
+
+ return 0;
+}
+
+static int ena_set_rxfh(struct net_device *netdev, const u32 *indir,
+ const u8 *key, const u8 hfunc)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ enum ena_admin_hash_functions func = 0;
+ int rc;
+
+ if (indir) {
+ rc = ena_indirection_table_set(adapter, indir);
+ if (rc)
+ return rc;
+ }
+
+ switch (hfunc) {
+ case ETH_RSS_HASH_NO_CHANGE:
+ func = ena_com_get_current_hash_function(ena_dev);
+ break;
+ case ETH_RSS_HASH_TOP:
+ func = ENA_ADMIN_TOEPLITZ;
+ break;
+ case ETH_RSS_HASH_CRC32:
+ func = ENA_ADMIN_CRC32;
+ break;
+ default:
+ netif_err(adapter, drv, netdev, "Unsupported hfunc %d\n",
+ hfunc);
+ return -EOPNOTSUPP;
+ }
+
+ if (key || func) {
+ rc = ena_com_fill_hash_function(ena_dev, func, key,
+ ENA_HASH_KEY_SIZE,
+ 0xFFFFFFFF);
+ if (unlikely(rc)) {
+ netif_err(adapter, drv, netdev, "Cannot fill key\n");
+ return rc == -EPERM ? -EOPNOTSUPP : rc;
+ }
+ }
+
+ return 0;
+}
+
+static void ena_get_channels(struct net_device *netdev,
+ struct ethtool_channels *channels)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+
+ channels->max_combined = adapter->max_num_io_queues;
+ channels->combined_count = adapter->num_io_queues;
+}
+
+static int ena_set_channels(struct net_device *netdev,
+ struct ethtool_channels *channels)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+ u32 count = channels->combined_count;
+ /* The check for max value is already done in ethtool */
+ if (count < ENA_MIN_NUM_IO_QUEUES)
+ return -EINVAL;
+
+ if (!ena_xdp_legal_queue_count(adapter, count)) {
+ if (ena_xdp_present(adapter))
+ return -EINVAL;
+
+ xdp_clear_features_flag(netdev);
+ } else {
+ xdp_set_features_flag(netdev,
+ NETDEV_XDP_ACT_BASIC |
+ NETDEV_XDP_ACT_REDIRECT);
+ }
+
+ return ena_update_queue_count(adapter, count);
+}
+
+static int ena_get_tunable(struct net_device *netdev,
+ const struct ethtool_tunable *tuna, void *data)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+ int ret = 0;
+
+ switch (tuna->id) {
+ case ETHTOOL_RX_COPYBREAK:
+ *(u32 *)data = adapter->rx_copybreak;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int ena_set_tunable(struct net_device *netdev,
+ const struct ethtool_tunable *tuna,
+ const void *data)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+ int ret = 0;
+ u32 len;
+
+ switch (tuna->id) {
+ case ETHTOOL_RX_COPYBREAK:
+ len = *(u32 *)data;
+ ret = ena_set_rx_copybreak(adapter, len);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static const struct ethtool_ops ena_ethtool_ops = {
+ .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+ ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
+ .supported_ring_params = ETHTOOL_RING_USE_TX_PUSH_BUF_LEN |
+ ETHTOOL_RING_USE_TX_PUSH,
+ .get_link_ksettings = ena_get_link_ksettings,
+ .get_drvinfo = ena_get_drvinfo,
+ .get_msglevel = ena_get_msglevel,
+ .set_msglevel = ena_set_msglevel,
+ .get_link = ethtool_op_get_link,
+ .get_coalesce = ena_get_coalesce,
+ .set_coalesce = ena_set_coalesce,
+ .get_ringparam = ena_get_ringparam,
+ .set_ringparam = ena_set_ringparam,
+ .get_sset_count = ena_get_sset_count,
+ .get_strings = ena_get_ethtool_strings,
+ .get_ethtool_stats = ena_get_ethtool_stats,
+ .get_rxnfc = ena_get_rxnfc,
+ .set_rxnfc = ena_set_rxnfc,
+ .get_rxfh_indir_size = ena_get_rxfh_indir_size,
+ .get_rxfh_key_size = ena_get_rxfh_key_size,
+ .get_rxfh = ena_get_rxfh,
+ .set_rxfh = ena_set_rxfh,
+ .get_channels = ena_get_channels,
+ .set_channels = ena_set_channels,
+ .get_tunable = ena_get_tunable,
+ .set_tunable = ena_set_tunable,
+ .get_ts_info = ethtool_op_get_ts_info,
+};
+
+void ena_set_ethtool_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &ena_ethtool_ops;
+}
+
+static void ena_dump_stats_ex(struct ena_adapter *adapter, u8 *buf)
+{
+ struct net_device *netdev = adapter->netdev;
+ u8 *strings_buf;
+ u64 *data_buf;
+ int strings_num;
+ int i, rc;
+
+ strings_num = ena_get_sw_stats_count(adapter);
+ if (strings_num <= 0) {
+ netif_err(adapter, drv, netdev, "Can't get stats num\n");
+ return;
+ }
+
+ strings_buf = devm_kcalloc(&adapter->pdev->dev,
+ ETH_GSTRING_LEN, strings_num,
+ GFP_ATOMIC);
+ if (!strings_buf) {
+ netif_err(adapter, drv, netdev,
+ "Failed to allocate strings_buf\n");
+ return;
+ }
+
+ data_buf = devm_kcalloc(&adapter->pdev->dev,
+ strings_num, sizeof(u64),
+ GFP_ATOMIC);
+ if (!data_buf) {
+ netif_err(adapter, drv, netdev,
+ "Failed to allocate data buf\n");
+ devm_kfree(&adapter->pdev->dev, strings_buf);
+ return;
+ }
+
+ ena_get_strings(adapter, strings_buf, false);
+ ena_get_stats(adapter, data_buf, false);
+
+ /* If there is a buffer, dump stats, otherwise print them to dmesg */
+ if (buf)
+ for (i = 0; i < strings_num; i++) {
+ rc = snprintf(buf, ETH_GSTRING_LEN + sizeof(u64),
+ "%s %llu\n",
+ strings_buf + i * ETH_GSTRING_LEN,
+ data_buf[i]);
+ buf += rc;
+ }
+ else
+ for (i = 0; i < strings_num; i++)
+ netif_err(adapter, drv, netdev, "%s: %llu\n",
+ strings_buf + i * ETH_GSTRING_LEN,
+ data_buf[i]);
+
+ devm_kfree(&adapter->pdev->dev, strings_buf);
+ devm_kfree(&adapter->pdev->dev, data_buf);
+}
+
+void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf)
+{
+ if (!buf)
+ return;
+
+ ena_dump_stats_ex(adapter, buf);
+}
+
+void ena_dump_stats_to_dmesg(struct ena_adapter *adapter)
+{
+ ena_dump_stats_ex(adapter, NULL);
+}
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
new file mode 100644
index 0000000000..14e41eb577
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -0,0 +1,4832 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif /* CONFIG_RFS_ACCEL */
+#include <linux/ethtool.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/numa.h>
+#include <linux/pci.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
+#include <linux/vmalloc.h>
+#include <net/ip.h>
+
+#include "ena_netdev.h"
+#include <linux/bpf_trace.h>
+#include "ena_pci_id_tbl.h"
+
+MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
+MODULE_DESCRIPTION(DEVICE_NAME);
+MODULE_LICENSE("GPL");
+
+/* Time in jiffies before concluding the transmitter is hung. */
+#define TX_TIMEOUT (5 * HZ)
+
+#define ENA_MAX_RINGS min_t(unsigned int, ENA_MAX_NUM_IO_QUEUES, num_possible_cpus())
+
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
+ NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
+
+static struct ena_aenq_handlers aenq_handlers;
+
+static struct workqueue_struct *ena_wq;
+
+MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
+
+static int ena_rss_init_default(struct ena_adapter *adapter);
+static void check_for_admin_com_state(struct ena_adapter *adapter);
+static void ena_destroy_device(struct ena_adapter *adapter, bool graceful);
+static int ena_restore_device(struct ena_adapter *adapter);
+
+static void ena_init_io_rings(struct ena_adapter *adapter,
+ int first_index, int count);
+static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index,
+ int count);
+static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index,
+ int count);
+static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid);
+static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
+ int first_index,
+ int count);
+static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid);
+static void ena_free_tx_resources(struct ena_adapter *adapter, int qid);
+static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget);
+static void ena_destroy_all_tx_queues(struct ena_adapter *adapter);
+static void ena_free_all_io_tx_resources(struct ena_adapter *adapter);
+static void ena_napi_disable_in_range(struct ena_adapter *adapter,
+ int first_index, int count);
+static void ena_napi_enable_in_range(struct ena_adapter *adapter,
+ int first_index, int count);
+static int ena_up(struct ena_adapter *adapter);
+static void ena_down(struct ena_adapter *adapter);
+static void ena_unmask_interrupt(struct ena_ring *tx_ring,
+ struct ena_ring *rx_ring);
+static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
+ struct ena_ring *rx_ring);
+static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
+ struct ena_tx_buffer *tx_info);
+static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
+ int first_index, int count);
+static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
+ int first_index, int count);
+
+/* Increase a stat by cnt while holding syncp seqlock on 32bit machines */
+static void ena_increase_stat(u64 *statp, u64 cnt,
+ struct u64_stats_sync *syncp)
+{
+ u64_stats_update_begin(syncp);
+ (*statp) += cnt;
+ u64_stats_update_end(syncp);
+}
+
+static void ena_ring_tx_doorbell(struct ena_ring *tx_ring)
+{
+ ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
+ ena_increase_stat(&tx_ring->tx_stats.doorbells, 1, &tx_ring->syncp);
+}
+
+static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+ struct ena_adapter *adapter = netdev_priv(dev);
+
+ /* Change the state of the device to trigger reset
+ * Check that we are not in the middle or a trigger already
+ */
+
+ if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+ return;
+
+ ena_reset_device(adapter, ENA_REGS_RESET_OS_NETDEV_WD);
+ ena_increase_stat(&adapter->dev_stats.tx_timeout, 1, &adapter->syncp);
+
+ netif_err(adapter, tx_err, dev, "Transmit time out\n");
+}
+
+static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_io_queues; i++)
+ adapter->rx_ring[i].mtu = mtu;
+}
+
+static int ena_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct ena_adapter *adapter = netdev_priv(dev);
+ int ret;
+
+ ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
+ if (!ret) {
+ netif_dbg(adapter, drv, dev, "Set MTU to %d\n", new_mtu);
+ update_rx_ring_mtu(adapter, new_mtu);
+ dev->mtu = new_mtu;
+ } else {
+ netif_err(adapter, drv, dev, "Failed to set MTU to %d\n",
+ new_mtu);
+ }
+
+ return ret;
+}
+
+static int ena_xmit_common(struct net_device *dev,
+ struct ena_ring *ring,
+ struct ena_tx_buffer *tx_info,
+ struct ena_com_tx_ctx *ena_tx_ctx,
+ u16 next_to_use,
+ u32 bytes)
+{
+ struct ena_adapter *adapter = netdev_priv(dev);
+ int rc, nb_hw_desc;
+
+ if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq,
+ ena_tx_ctx))) {
+ netif_dbg(adapter, tx_queued, dev,
+ "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
+ ring->qid);
+ ena_ring_tx_doorbell(ring);
+ }
+
+ /* prepare the packet's descriptors to dma engine */
+ rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx,
+ &nb_hw_desc);
+
+ /* In case there isn't enough space in the queue for the packet,
+ * we simply drop it. All other failure reasons of
+ * ena_com_prepare_tx() are fatal and therefore require a device reset.
+ */
+ if (unlikely(rc)) {
+ netif_err(adapter, tx_queued, dev,
+ "Failed to prepare tx bufs\n");
+ ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1,
+ &ring->syncp);
+ if (rc != -ENOMEM)
+ ena_reset_device(adapter,
+ ENA_REGS_RESET_DRIVER_INVALID_STATE);
+ return rc;
+ }
+
+ u64_stats_update_begin(&ring->syncp);
+ ring->tx_stats.cnt++;
+ ring->tx_stats.bytes += bytes;
+ u64_stats_update_end(&ring->syncp);
+
+ tx_info->tx_descs = nb_hw_desc;
+ tx_info->last_jiffies = jiffies;
+ tx_info->print_once = 0;
+
+ ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
+ ring->ring_size);
+ return 0;
+}
+
+/* This is the XDP napi callback. XDP queues use a separate napi callback
+ * than Rx/Tx queues.
+ */
+static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
+{
+ struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
+ u32 xdp_work_done, xdp_budget;
+ struct ena_ring *xdp_ring;
+ int napi_comp_call = 0;
+ int ret;
+
+ xdp_ring = ena_napi->xdp_ring;
+
+ xdp_budget = budget;
+
+ if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) ||
+ test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) {
+ napi_complete_done(napi, 0);
+ return 0;
+ }
+
+ xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget);
+
+ /* If the device is about to reset or down, avoid unmask
+ * the interrupt and return 0 so NAPI won't reschedule
+ */
+ if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) {
+ napi_complete_done(napi, 0);
+ ret = 0;
+ } else if (xdp_budget > xdp_work_done) {
+ napi_comp_call = 1;
+ if (napi_complete_done(napi, xdp_work_done))
+ ena_unmask_interrupt(xdp_ring, NULL);
+ ena_update_ring_numa_node(xdp_ring, NULL);
+ ret = xdp_work_done;
+ } else {
+ ret = xdp_budget;
+ }
+
+ u64_stats_update_begin(&xdp_ring->syncp);
+ xdp_ring->tx_stats.napi_comp += napi_comp_call;
+ xdp_ring->tx_stats.tx_poll++;
+ u64_stats_update_end(&xdp_ring->syncp);
+ xdp_ring->tx_stats.last_napi_jiffies = jiffies;
+
+ return ret;
+}
+
+static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
+ struct ena_tx_buffer *tx_info,
+ struct xdp_frame *xdpf,
+ struct ena_com_tx_ctx *ena_tx_ctx)
+{
+ struct ena_adapter *adapter = xdp_ring->adapter;
+ struct ena_com_buf *ena_buf;
+ int push_len = 0;
+ dma_addr_t dma;
+ void *data;
+ u32 size;
+
+ tx_info->xdpf = xdpf;
+ data = tx_info->xdpf->data;
+ size = tx_info->xdpf->len;
+
+ if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+ /* Designate part of the packet for LLQ */
+ push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
+
+ ena_tx_ctx->push_header = data;
+
+ size -= push_len;
+ data += push_len;
+ }
+
+ ena_tx_ctx->header_len = push_len;
+
+ if (size > 0) {
+ dma = dma_map_single(xdp_ring->dev,
+ data,
+ size,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
+ goto error_report_dma_error;
+
+ tx_info->map_linear_data = 0;
+
+ ena_buf = tx_info->bufs;
+ ena_buf->paddr = dma;
+ ena_buf->len = size;
+
+ ena_tx_ctx->ena_bufs = ena_buf;
+ ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1;
+ }
+
+ return 0;
+
+error_report_dma_error:
+ ena_increase_stat(&xdp_ring->tx_stats.dma_mapping_err, 1,
+ &xdp_ring->syncp);
+ netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
+
+ return -EINVAL;
+}
+
+static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
+ struct net_device *dev,
+ struct xdp_frame *xdpf,
+ int flags)
+{
+ struct ena_com_tx_ctx ena_tx_ctx = {};
+ struct ena_tx_buffer *tx_info;
+ u16 next_to_use, req_id;
+ int rc;
+
+ next_to_use = xdp_ring->next_to_use;
+ req_id = xdp_ring->free_ids[next_to_use];
+ tx_info = &xdp_ring->tx_buffer_info[req_id];
+ tx_info->num_of_bufs = 0;
+
+ rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx);
+ if (unlikely(rc))
+ return rc;
+
+ ena_tx_ctx.req_id = req_id;
+
+ rc = ena_xmit_common(dev,
+ xdp_ring,
+ tx_info,
+ &ena_tx_ctx,
+ next_to_use,
+ xdpf->len);
+ if (rc)
+ goto error_unmap_dma;
+
+ /* trigger the dma engine. ena_ring_tx_doorbell()
+ * calls a memory barrier inside it.
+ */
+ if (flags & XDP_XMIT_FLUSH)
+ ena_ring_tx_doorbell(xdp_ring);
+
+ return rc;
+
+error_unmap_dma:
+ ena_unmap_tx_buff(xdp_ring, tx_info);
+ tx_info->xdpf = NULL;
+ return rc;
+}
+
+static int ena_xdp_xmit(struct net_device *dev, int n,
+ struct xdp_frame **frames, u32 flags)
+{
+ struct ena_adapter *adapter = netdev_priv(dev);
+ struct ena_ring *xdp_ring;
+ int qid, i, nxmit = 0;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+ return -ENETDOWN;
+
+ /* We assume that all rings have the same XDP program */
+ if (!READ_ONCE(adapter->rx_ring->xdp_bpf_prog))
+ return -ENXIO;
+
+ qid = smp_processor_id() % adapter->xdp_num_queues;
+ qid += adapter->xdp_first_ring;
+ xdp_ring = &adapter->tx_ring[qid];
+
+ /* Other CPU ids might try to send thorugh this queue */
+ spin_lock(&xdp_ring->xdp_tx_lock);
+
+ for (i = 0; i < n; i++) {
+ if (ena_xdp_xmit_frame(xdp_ring, dev, frames[i], 0))
+ break;
+ nxmit++;
+ }
+
+ /* Ring doorbell to make device aware of the packets */
+ if (flags & XDP_XMIT_FLUSH)
+ ena_ring_tx_doorbell(xdp_ring);
+
+ spin_unlock(&xdp_ring->xdp_tx_lock);
+
+ /* Return number of packets sent */
+ return nxmit;
+}
+
+static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
+{
+ u32 verdict = ENA_XDP_PASS;
+ struct bpf_prog *xdp_prog;
+ struct ena_ring *xdp_ring;
+ struct xdp_frame *xdpf;
+ u64 *xdp_stat;
+
+ xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
+
+ if (!xdp_prog)
+ goto out;
+
+ verdict = bpf_prog_run_xdp(xdp_prog, xdp);
+
+ switch (verdict) {
+ case XDP_TX:
+ xdpf = xdp_convert_buff_to_frame(xdp);
+ if (unlikely(!xdpf)) {
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+ xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+ verdict = ENA_XDP_DROP;
+ break;
+ }
+
+ /* Find xmit queue */
+ xdp_ring = rx_ring->xdp_ring;
+
+ /* The XDP queues are shared between XDP_TX and XDP_REDIRECT */
+ spin_lock(&xdp_ring->xdp_tx_lock);
+
+ if (ena_xdp_xmit_frame(xdp_ring, rx_ring->netdev, xdpf,
+ XDP_XMIT_FLUSH))
+ xdp_return_frame(xdpf);
+
+ spin_unlock(&xdp_ring->xdp_tx_lock);
+ xdp_stat = &rx_ring->rx_stats.xdp_tx;
+ verdict = ENA_XDP_TX;
+ break;
+ case XDP_REDIRECT:
+ if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) {
+ xdp_stat = &rx_ring->rx_stats.xdp_redirect;
+ verdict = ENA_XDP_REDIRECT;
+ break;
+ }
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+ xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+ verdict = ENA_XDP_DROP;
+ break;
+ case XDP_ABORTED:
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+ xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+ verdict = ENA_XDP_DROP;
+ break;
+ case XDP_DROP:
+ xdp_stat = &rx_ring->rx_stats.xdp_drop;
+ verdict = ENA_XDP_DROP;
+ break;
+ case XDP_PASS:
+ xdp_stat = &rx_ring->rx_stats.xdp_pass;
+ verdict = ENA_XDP_PASS;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict);
+ xdp_stat = &rx_ring->rx_stats.xdp_invalid;
+ verdict = ENA_XDP_DROP;
+ }
+
+ ena_increase_stat(xdp_stat, 1, &rx_ring->syncp);
+out:
+ return verdict;
+}
+
+static void ena_init_all_xdp_queues(struct ena_adapter *adapter)
+{
+ adapter->xdp_first_ring = adapter->num_io_queues;
+ adapter->xdp_num_queues = adapter->num_io_queues;
+
+ ena_init_io_rings(adapter,
+ adapter->xdp_first_ring,
+ adapter->xdp_num_queues);
+}
+
+static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter)
+{
+ u32 xdp_first_ring = adapter->xdp_first_ring;
+ u32 xdp_num_queues = adapter->xdp_num_queues;
+ int rc = 0;
+
+ rc = ena_setup_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues);
+ if (rc)
+ goto setup_err;
+
+ rc = ena_create_io_tx_queues_in_range(adapter, xdp_first_ring, xdp_num_queues);
+ if (rc)
+ goto create_err;
+
+ return 0;
+
+create_err:
+ ena_free_all_io_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues);
+setup_err:
+ return rc;
+}
+
+/* Provides a way for both kernel and bpf-prog to know
+ * more about the RX-queue a given XDP frame arrived on.
+ */
+static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring)
+{
+ int rc;
+
+ rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid, 0);
+
+ if (rc) {
+ netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
+ "Failed to register xdp rx queue info. RX queue num %d rc: %d\n",
+ rx_ring->qid, rc);
+ goto err;
+ }
+
+ rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED,
+ NULL);
+
+ if (rc) {
+ netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
+ "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n",
+ rx_ring->qid, rc);
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+ }
+
+err:
+ return rc;
+}
+
+static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring)
+{
+ xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq);
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+}
+
+static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
+ struct bpf_prog *prog,
+ int first, int count)
+{
+ struct bpf_prog *old_bpf_prog;
+ struct ena_ring *rx_ring;
+ int i = 0;
+
+ for (i = first; i < count; i++) {
+ rx_ring = &adapter->rx_ring[i];
+ old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog);
+
+ if (!old_bpf_prog && prog) {
+ ena_xdp_register_rxq_info(rx_ring);
+ rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
+ } else if (old_bpf_prog && !prog) {
+ ena_xdp_unregister_rxq_info(rx_ring);
+ rx_ring->rx_headroom = NET_SKB_PAD;
+ }
+ }
+}
+
+static void ena_xdp_exchange_program(struct ena_adapter *adapter,
+ struct bpf_prog *prog)
+{
+ struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
+
+ ena_xdp_exchange_program_rx_in_range(adapter,
+ prog,
+ 0,
+ adapter->num_io_queues);
+
+ if (old_bpf_prog)
+ bpf_prog_put(old_bpf_prog);
+}
+
+static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter)
+{
+ bool was_up;
+ int rc;
+
+ was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+
+ if (was_up)
+ ena_down(adapter);
+
+ adapter->xdp_first_ring = 0;
+ adapter->xdp_num_queues = 0;
+ ena_xdp_exchange_program(adapter, NULL);
+ if (was_up) {
+ rc = ena_up(adapter);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
+static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+ struct bpf_prog *prog = bpf->prog;
+ struct bpf_prog *old_bpf_prog;
+ int rc, prev_mtu;
+ bool is_up;
+
+ is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+ rc = ena_xdp_allowed(adapter);
+ if (rc == ENA_XDP_ALLOWED) {
+ old_bpf_prog = adapter->xdp_bpf_prog;
+ if (prog) {
+ if (!is_up) {
+ ena_init_all_xdp_queues(adapter);
+ } else if (!old_bpf_prog) {
+ ena_down(adapter);
+ ena_init_all_xdp_queues(adapter);
+ }
+ ena_xdp_exchange_program(adapter, prog);
+
+ if (is_up && !old_bpf_prog) {
+ rc = ena_up(adapter);
+ if (rc)
+ return rc;
+ }
+ xdp_features_set_redirect_target(netdev, false);
+ } else if (old_bpf_prog) {
+ xdp_features_clear_redirect_target(netdev);
+ rc = ena_destroy_and_free_all_xdp_queues(adapter);
+ if (rc)
+ return rc;
+ }
+
+ prev_mtu = netdev->max_mtu;
+ netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu;
+
+ if (!old_bpf_prog)
+ netif_info(adapter, drv, adapter->netdev,
+ "XDP program is set, changing the max_mtu from %d to %d",
+ prev_mtu, netdev->max_mtu);
+
+ } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) {
+ netif_err(adapter, drv, adapter->netdev,
+ "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on",
+ netdev->mtu, ENA_XDP_MAX_MTU);
+ NL_SET_ERR_MSG_MOD(bpf->extack,
+ "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info");
+ return -EINVAL;
+ } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) {
+ netif_err(adapter, drv, adapter->netdev,
+ "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n",
+ adapter->num_io_queues, adapter->max_num_io_queues);
+ NL_SET_ERR_MSG_MOD(bpf->extack,
+ "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* This is the main xdp callback, it's used by the kernel to set/unset the xdp
+ * program as well as to query the current xdp program id.
+ */
+static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
+{
+ switch (bpf->command) {
+ case XDP_SETUP_PROG:
+ return ena_xdp_set(netdev, bpf);
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
+{
+#ifdef CONFIG_RFS_ACCEL
+ u32 i;
+ int rc;
+
+ adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues);
+ if (!adapter->netdev->rx_cpu_rmap)
+ return -ENOMEM;
+ for (i = 0; i < adapter->num_io_queues; i++) {
+ int irq_idx = ENA_IO_IRQ_IDX(i);
+
+ rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
+ pci_irq_vector(adapter->pdev, irq_idx));
+ if (rc) {
+ free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
+ adapter->netdev->rx_cpu_rmap = NULL;
+ return rc;
+ }
+ }
+#endif /* CONFIG_RFS_ACCEL */
+ return 0;
+}
+
+static void ena_init_io_rings_common(struct ena_adapter *adapter,
+ struct ena_ring *ring, u16 qid)
+{
+ ring->qid = qid;
+ ring->pdev = adapter->pdev;
+ ring->dev = &adapter->pdev->dev;
+ ring->netdev = adapter->netdev;
+ ring->napi = &adapter->ena_napi[qid].napi;
+ ring->adapter = adapter;
+ ring->ena_dev = adapter->ena_dev;
+ ring->per_napi_packets = 0;
+ ring->cpu = 0;
+ ring->numa_node = 0;
+ ring->no_interrupt_event_cnt = 0;
+ u64_stats_init(&ring->syncp);
+}
+
+static void ena_init_io_rings(struct ena_adapter *adapter,
+ int first_index, int count)
+{
+ struct ena_com_dev *ena_dev;
+ struct ena_ring *txr, *rxr;
+ int i;
+
+ ena_dev = adapter->ena_dev;
+
+ for (i = first_index; i < first_index + count; i++) {
+ txr = &adapter->tx_ring[i];
+ rxr = &adapter->rx_ring[i];
+
+ /* TX common ring state */
+ ena_init_io_rings_common(adapter, txr, i);
+
+ /* TX specific ring state */
+ txr->ring_size = adapter->requested_tx_ring_size;
+ txr->tx_max_header_size = ena_dev->tx_max_header_size;
+ txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
+ txr->sgl_size = adapter->max_tx_sgl_size;
+ txr->smoothed_interval =
+ ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
+ txr->disable_meta_caching = adapter->disable_meta_caching;
+ spin_lock_init(&txr->xdp_tx_lock);
+
+ /* Don't init RX queues for xdp queues */
+ if (!ENA_IS_XDP_INDEX(adapter, i)) {
+ /* RX common ring state */
+ ena_init_io_rings_common(adapter, rxr, i);
+
+ /* RX specific ring state */
+ rxr->ring_size = adapter->requested_rx_ring_size;
+ rxr->rx_copybreak = adapter->rx_copybreak;
+ rxr->sgl_size = adapter->max_rx_sgl_size;
+ rxr->smoothed_interval =
+ ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
+ rxr->empty_rx_queue = 0;
+ rxr->rx_headroom = NET_SKB_PAD;
+ adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ rxr->xdp_ring = &adapter->tx_ring[i + adapter->num_io_queues];
+ }
+ }
+}
+
+/* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors)
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Return 0 on success, negative on failure
+ */
+static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
+{
+ struct ena_ring *tx_ring = &adapter->tx_ring[qid];
+ struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
+ int size, i, node;
+
+ if (tx_ring->tx_buffer_info) {
+ netif_err(adapter, ifup,
+ adapter->netdev, "tx_buffer_info info is not NULL");
+ return -EEXIST;
+ }
+
+ size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
+ node = cpu_to_node(ena_irq->cpu);
+
+ tx_ring->tx_buffer_info = vzalloc_node(size, node);
+ if (!tx_ring->tx_buffer_info) {
+ tx_ring->tx_buffer_info = vzalloc(size);
+ if (!tx_ring->tx_buffer_info)
+ goto err_tx_buffer_info;
+ }
+
+ size = sizeof(u16) * tx_ring->ring_size;
+ tx_ring->free_ids = vzalloc_node(size, node);
+ if (!tx_ring->free_ids) {
+ tx_ring->free_ids = vzalloc(size);
+ if (!tx_ring->free_ids)
+ goto err_tx_free_ids;
+ }
+
+ size = tx_ring->tx_max_header_size;
+ tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node);
+ if (!tx_ring->push_buf_intermediate_buf) {
+ tx_ring->push_buf_intermediate_buf = vzalloc(size);
+ if (!tx_ring->push_buf_intermediate_buf)
+ goto err_push_buf_intermediate_buf;
+ }
+
+ /* Req id ring for TX out of order completions */
+ for (i = 0; i < tx_ring->ring_size; i++)
+ tx_ring->free_ids[i] = i;
+
+ /* Reset tx statistics */
+ memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
+
+ tx_ring->next_to_use = 0;
+ tx_ring->next_to_clean = 0;
+ tx_ring->cpu = ena_irq->cpu;
+ tx_ring->numa_node = node;
+ return 0;
+
+err_push_buf_intermediate_buf:
+ vfree(tx_ring->free_ids);
+ tx_ring->free_ids = NULL;
+err_tx_free_ids:
+ vfree(tx_ring->tx_buffer_info);
+ tx_ring->tx_buffer_info = NULL;
+err_tx_buffer_info:
+ return -ENOMEM;
+}
+
+/* ena_free_tx_resources - Free I/O Tx Resources per Queue
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Free all transmit software resources
+ */
+static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
+{
+ struct ena_ring *tx_ring = &adapter->tx_ring[qid];
+
+ vfree(tx_ring->tx_buffer_info);
+ tx_ring->tx_buffer_info = NULL;
+
+ vfree(tx_ring->free_ids);
+ tx_ring->free_ids = NULL;
+
+ vfree(tx_ring->push_buf_intermediate_buf);
+ tx_ring->push_buf_intermediate_buf = NULL;
+}
+
+static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
+ int first_index,
+ int count)
+{
+ int i, rc = 0;
+
+ for (i = first_index; i < first_index + count; i++) {
+ rc = ena_setup_tx_resources(adapter, i);
+ if (rc)
+ goto err_setup_tx;
+ }
+
+ return 0;
+
+err_setup_tx:
+
+ netif_err(adapter, ifup, adapter->netdev,
+ "Tx queue %d: allocation failed\n", i);
+
+ /* rewind the index freeing the rings as we go */
+ while (first_index < i--)
+ ena_free_tx_resources(adapter, i);
+ return rc;
+}
+
+static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
+ int first_index, int count)
+{
+ int i;
+
+ for (i = first_index; i < first_index + count; i++)
+ ena_free_tx_resources(adapter, i);
+}
+
+/* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ */
+static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
+{
+ ena_free_all_io_tx_resources_in_range(adapter,
+ 0,
+ adapter->xdp_num_queues +
+ adapter->num_io_queues);
+}
+
+/* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ena_setup_rx_resources(struct ena_adapter *adapter,
+ u32 qid)
+{
+ struct ena_ring *rx_ring = &adapter->rx_ring[qid];
+ struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
+ int size, node, i;
+
+ if (rx_ring->rx_buffer_info) {
+ netif_err(adapter, ifup, adapter->netdev,
+ "rx_buffer_info is not NULL");
+ return -EEXIST;
+ }
+
+ /* alloc extra element so in rx path
+ * we can always prefetch rx_info + 1
+ */
+ size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1);
+ node = cpu_to_node(ena_irq->cpu);
+
+ rx_ring->rx_buffer_info = vzalloc_node(size, node);
+ if (!rx_ring->rx_buffer_info) {
+ rx_ring->rx_buffer_info = vzalloc(size);
+ if (!rx_ring->rx_buffer_info)
+ return -ENOMEM;
+ }
+
+ size = sizeof(u16) * rx_ring->ring_size;
+ rx_ring->free_ids = vzalloc_node(size, node);
+ if (!rx_ring->free_ids) {
+ rx_ring->free_ids = vzalloc(size);
+ if (!rx_ring->free_ids) {
+ vfree(rx_ring->rx_buffer_info);
+ rx_ring->rx_buffer_info = NULL;
+ return -ENOMEM;
+ }
+ }
+
+ /* Req id ring for receiving RX pkts out of order */
+ for (i = 0; i < rx_ring->ring_size; i++)
+ rx_ring->free_ids[i] = i;
+
+ /* Reset rx statistics */
+ memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
+
+ rx_ring->next_to_clean = 0;
+ rx_ring->next_to_use = 0;
+ rx_ring->cpu = ena_irq->cpu;
+ rx_ring->numa_node = node;
+
+ return 0;
+}
+
+/* ena_free_rx_resources - Free I/O Rx Resources
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Free all receive software resources
+ */
+static void ena_free_rx_resources(struct ena_adapter *adapter,
+ u32 qid)
+{
+ struct ena_ring *rx_ring = &adapter->rx_ring[qid];
+
+ vfree(rx_ring->rx_buffer_info);
+ rx_ring->rx_buffer_info = NULL;
+
+ vfree(rx_ring->free_ids);
+ rx_ring->free_ids = NULL;
+}
+
+/* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ */
+static int ena_setup_all_rx_resources(struct ena_adapter *adapter)
+{
+ int i, rc = 0;
+
+ for (i = 0; i < adapter->num_io_queues; i++) {
+ rc = ena_setup_rx_resources(adapter, i);
+ if (rc)
+ goto err_setup_rx;
+ }
+
+ return 0;
+
+err_setup_rx:
+
+ netif_err(adapter, ifup, adapter->netdev,
+ "Rx queue %d: allocation failed\n", i);
+
+ /* rewind the index freeing the rings as we go */
+ while (i--)
+ ena_free_rx_resources(adapter, i);
+ return rc;
+}
+
+/* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ */
+static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_io_queues; i++)
+ ena_free_rx_resources(adapter, i);
+}
+
+static struct page *ena_alloc_map_page(struct ena_ring *rx_ring,
+ dma_addr_t *dma)
+{
+ struct page *page;
+
+ /* This would allocate the page on the same NUMA node the executing code
+ * is running on.
+ */
+ page = dev_alloc_page();
+ if (!page) {
+ ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1,
+ &rx_ring->syncp);
+ return ERR_PTR(-ENOSPC);
+ }
+
+ /* To enable NIC-side port-mirroring, AKA SPAN port,
+ * we make the buffer readable from the nic as well
+ */
+ *dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ if (unlikely(dma_mapping_error(rx_ring->dev, *dma))) {
+ ena_increase_stat(&rx_ring->rx_stats.dma_mapping_err, 1,
+ &rx_ring->syncp);
+ __free_page(page);
+ return ERR_PTR(-EIO);
+ }
+
+ return page;
+}
+
+static int ena_alloc_rx_buffer(struct ena_ring *rx_ring,
+ struct ena_rx_buffer *rx_info)
+{
+ int headroom = rx_ring->rx_headroom;
+ struct ena_com_buf *ena_buf;
+ struct page *page;
+ dma_addr_t dma;
+ int tailroom;
+
+ /* restore page offset value in case it has been changed by device */
+ rx_info->buf_offset = headroom;
+
+ /* if previous allocated page is not used */
+ if (unlikely(rx_info->page))
+ return 0;
+
+ /* We handle DMA here */
+ page = ena_alloc_map_page(rx_ring, &dma);
+ if (unlikely(IS_ERR(page)))
+ return PTR_ERR(page);
+
+ netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+ "Allocate page %p, rx_info %p\n", page, rx_info);
+
+ tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+ rx_info->page = page;
+ rx_info->dma_addr = dma;
+ rx_info->page_offset = 0;
+ ena_buf = &rx_info->ena_buf;
+ ena_buf->paddr = dma + headroom;
+ ena_buf->len = ENA_PAGE_SIZE - headroom - tailroom;
+
+ return 0;
+}
+
+static void ena_unmap_rx_buff_attrs(struct ena_ring *rx_ring,
+ struct ena_rx_buffer *rx_info,
+ unsigned long attrs)
+{
+ dma_unmap_page_attrs(rx_ring->dev, rx_info->dma_addr, ENA_PAGE_SIZE,
+ DMA_BIDIRECTIONAL, attrs);
+}
+
+static void ena_free_rx_page(struct ena_ring *rx_ring,
+ struct ena_rx_buffer *rx_info)
+{
+ struct page *page = rx_info->page;
+
+ if (unlikely(!page)) {
+ netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
+ "Trying to free unallocated buffer\n");
+ return;
+ }
+
+ ena_unmap_rx_buff_attrs(rx_ring, rx_info, 0);
+
+ __free_page(page);
+ rx_info->page = NULL;
+}
+
+static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
+{
+ u16 next_to_use, req_id;
+ u32 i;
+ int rc;
+
+ next_to_use = rx_ring->next_to_use;
+
+ for (i = 0; i < num; i++) {
+ struct ena_rx_buffer *rx_info;
+
+ req_id = rx_ring->free_ids[next_to_use];
+
+ rx_info = &rx_ring->rx_buffer_info[req_id];
+
+ rc = ena_alloc_rx_buffer(rx_ring, rx_info);
+ if (unlikely(rc < 0)) {
+ netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
+ "Failed to allocate buffer for rx queue %d\n",
+ rx_ring->qid);
+ break;
+ }
+ rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
+ &rx_info->ena_buf,
+ req_id);
+ if (unlikely(rc)) {
+ netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
+ "Failed to add buffer for rx queue %d\n",
+ rx_ring->qid);
+ break;
+ }
+ next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
+ rx_ring->ring_size);
+ }
+
+ if (unlikely(i < num)) {
+ ena_increase_stat(&rx_ring->rx_stats.refil_partial, 1,
+ &rx_ring->syncp);
+ netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
+ "Refilled rx qid %d with only %d buffers (from %d)\n",
+ rx_ring->qid, i, num);
+ }
+
+ /* ena_com_write_sq_doorbell issues a wmb() */
+ if (likely(i))
+ ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
+
+ rx_ring->next_to_use = next_to_use;
+
+ return i;
+}
+
+static void ena_free_rx_bufs(struct ena_adapter *adapter,
+ u32 qid)
+{
+ struct ena_ring *rx_ring = &adapter->rx_ring[qid];
+ u32 i;
+
+ for (i = 0; i < rx_ring->ring_size; i++) {
+ struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
+
+ if (rx_info->page)
+ ena_free_rx_page(rx_ring, rx_info);
+ }
+}
+
+/* ena_refill_all_rx_bufs - allocate all queues Rx buffers
+ * @adapter: board private structure
+ */
+static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
+{
+ struct ena_ring *rx_ring;
+ int i, rc, bufs_num;
+
+ for (i = 0; i < adapter->num_io_queues; i++) {
+ rx_ring = &adapter->rx_ring[i];
+ bufs_num = rx_ring->ring_size - 1;
+ rc = ena_refill_rx_bufs(rx_ring, bufs_num);
+
+ if (unlikely(rc != bufs_num))
+ netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
+ "Refilling Queue %d failed. allocated %d buffers from: %d\n",
+ i, rc, bufs_num);
+ }
+}
+
+static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_io_queues; i++)
+ ena_free_rx_bufs(adapter, i);
+}
+
+static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
+ struct ena_tx_buffer *tx_info)
+{
+ struct ena_com_buf *ena_buf;
+ u32 cnt;
+ int i;
+
+ ena_buf = tx_info->bufs;
+ cnt = tx_info->num_of_bufs;
+
+ if (unlikely(!cnt))
+ return;
+
+ if (tx_info->map_linear_data) {
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(ena_buf, paddr),
+ dma_unmap_len(ena_buf, len),
+ DMA_TO_DEVICE);
+ ena_buf++;
+ cnt--;
+ }
+
+ /* unmap remaining mapped pages */
+ for (i = 0; i < cnt; i++) {
+ dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
+ dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
+ ena_buf++;
+ }
+}
+
+/* ena_free_tx_bufs - Free Tx Buffers per Queue
+ * @tx_ring: TX ring for which buffers be freed
+ */
+static void ena_free_tx_bufs(struct ena_ring *tx_ring)
+{
+ bool print_once = true;
+ u32 i;
+
+ for (i = 0; i < tx_ring->ring_size; i++) {
+ struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
+
+ if (!tx_info->skb)
+ continue;
+
+ if (print_once) {
+ netif_notice(tx_ring->adapter, ifdown, tx_ring->netdev,
+ "Free uncompleted tx skb qid %d idx 0x%x\n",
+ tx_ring->qid, i);
+ print_once = false;
+ } else {
+ netif_dbg(tx_ring->adapter, ifdown, tx_ring->netdev,
+ "Free uncompleted tx skb qid %d idx 0x%x\n",
+ tx_ring->qid, i);
+ }
+
+ ena_unmap_tx_buff(tx_ring, tx_info);
+
+ dev_kfree_skb_any(tx_info->skb);
+ }
+ netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
+ tx_ring->qid));
+}
+
+static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
+{
+ struct ena_ring *tx_ring;
+ int i;
+
+ for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
+ tx_ring = &adapter->tx_ring[i];
+ ena_free_tx_bufs(tx_ring);
+ }
+}
+
+static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
+{
+ u16 ena_qid;
+ int i;
+
+ for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
+ ena_qid = ENA_IO_TXQ_IDX(i);
+ ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
+ }
+}
+
+static void ena_destroy_all_rx_queues(struct ena_adapter *adapter)
+{
+ u16 ena_qid;
+ int i;
+
+ for (i = 0; i < adapter->num_io_queues; i++) {
+ ena_qid = ENA_IO_RXQ_IDX(i);
+ cancel_work_sync(&adapter->ena_napi[i].dim.work);
+ ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
+ }
+}
+
+static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
+{
+ ena_destroy_all_tx_queues(adapter);
+ ena_destroy_all_rx_queues(adapter);
+}
+
+static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
+ struct ena_tx_buffer *tx_info, bool is_xdp)
+{
+ if (tx_info)
+ netif_err(ring->adapter,
+ tx_done,
+ ring->netdev,
+ "tx_info doesn't have valid %s. qid %u req_id %u",
+ is_xdp ? "xdp frame" : "skb", ring->qid, req_id);
+ else
+ netif_err(ring->adapter,
+ tx_done,
+ ring->netdev,
+ "Invalid req_id %u in qid %u\n",
+ req_id, ring->qid);
+
+ ena_increase_stat(&ring->tx_stats.bad_req_id, 1, &ring->syncp);
+ ena_reset_device(ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
+
+ return -EFAULT;
+}
+
+static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
+{
+ struct ena_tx_buffer *tx_info;
+
+ tx_info = &tx_ring->tx_buffer_info[req_id];
+ if (likely(tx_info->skb))
+ return 0;
+
+ return handle_invalid_req_id(tx_ring, req_id, tx_info, false);
+}
+
+static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id)
+{
+ struct ena_tx_buffer *tx_info;
+
+ tx_info = &xdp_ring->tx_buffer_info[req_id];
+ if (likely(tx_info->xdpf))
+ return 0;
+
+ return handle_invalid_req_id(xdp_ring, req_id, tx_info, true);
+}
+
+static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
+{
+ struct netdev_queue *txq;
+ bool above_thresh;
+ u32 tx_bytes = 0;
+ u32 total_done = 0;
+ u16 next_to_clean;
+ u16 req_id;
+ int tx_pkts = 0;
+ int rc;
+
+ next_to_clean = tx_ring->next_to_clean;
+ txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid);
+
+ while (tx_pkts < budget) {
+ struct ena_tx_buffer *tx_info;
+ struct sk_buff *skb;
+
+ rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
+ &req_id);
+ if (rc) {
+ if (unlikely(rc == -EINVAL))
+ handle_invalid_req_id(tx_ring, req_id, NULL,
+ false);
+ break;
+ }
+
+ /* validate that the request id points to a valid skb */
+ rc = validate_tx_req_id(tx_ring, req_id);
+ if (rc)
+ break;
+
+ tx_info = &tx_ring->tx_buffer_info[req_id];
+ skb = tx_info->skb;
+
+ /* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */
+ prefetch(&skb->end);
+
+ tx_info->skb = NULL;
+ tx_info->last_jiffies = 0;
+
+ ena_unmap_tx_buff(tx_ring, tx_info);
+
+ netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
+ "tx_poll: q %d skb %p completed\n", tx_ring->qid,
+ skb);
+
+ tx_bytes += skb->len;
+ dev_kfree_skb(skb);
+ tx_pkts++;
+ total_done += tx_info->tx_descs;
+
+ tx_ring->free_ids[next_to_clean] = req_id;
+ next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
+ tx_ring->ring_size);
+ }
+
+ tx_ring->next_to_clean = next_to_clean;
+ ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
+ ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
+
+ netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
+
+ netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
+ "tx_poll: q %d done. total pkts: %d\n",
+ tx_ring->qid, tx_pkts);
+
+ /* need to make the rings circular update visible to
+ * ena_start_xmit() before checking for netif_queue_stopped().
+ */
+ smp_mb();
+
+ above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+ ENA_TX_WAKEUP_THRESH);
+ if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
+ __netif_tx_lock(txq, smp_processor_id());
+ above_thresh =
+ ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+ ENA_TX_WAKEUP_THRESH);
+ if (netif_tx_queue_stopped(txq) && above_thresh &&
+ test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
+ netif_tx_wake_queue(txq);
+ ena_increase_stat(&tx_ring->tx_stats.queue_wakeup, 1,
+ &tx_ring->syncp);
+ }
+ __netif_tx_unlock(txq);
+ }
+
+ return tx_pkts;
+}
+
+static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag, u16 len)
+{
+ struct sk_buff *skb;
+
+ if (!first_frag)
+ skb = napi_alloc_skb(rx_ring->napi, len);
+ else
+ skb = napi_build_skb(first_frag, len);
+
+ if (unlikely(!skb)) {
+ ena_increase_stat(&rx_ring->rx_stats.skb_alloc_fail, 1,
+ &rx_ring->syncp);
+
+ netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
+ "Failed to allocate skb. first_frag %s\n",
+ first_frag ? "provided" : "not provided");
+ }
+
+ return skb;
+}
+
+static bool ena_try_rx_buf_page_reuse(struct ena_rx_buffer *rx_info, u16 buf_len,
+ u16 len, int pkt_offset)
+{
+ struct ena_com_buf *ena_buf = &rx_info->ena_buf;
+
+ /* More than ENA_MIN_RX_BUF_SIZE left in the reused buffer
+ * for data + headroom + tailroom.
+ */
+ if (SKB_DATA_ALIGN(len + pkt_offset) + ENA_MIN_RX_BUF_SIZE <= ena_buf->len) {
+ page_ref_inc(rx_info->page);
+ rx_info->page_offset += buf_len;
+ ena_buf->paddr += buf_len;
+ ena_buf->len -= buf_len;
+ return true;
+ }
+
+ return false;
+}
+
+static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
+ struct ena_com_rx_buf_info *ena_bufs,
+ u32 descs,
+ u16 *next_to_clean)
+{
+ int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ bool is_xdp_loaded = ena_xdp_present_ring(rx_ring);
+ struct ena_rx_buffer *rx_info;
+ struct ena_adapter *adapter;
+ int page_offset, pkt_offset;
+ dma_addr_t pre_reuse_paddr;
+ u16 len, req_id, buf = 0;
+ bool reuse_rx_buf_page;
+ struct sk_buff *skb;
+ void *buf_addr;
+ int buf_offset;
+ u16 buf_len;
+
+ len = ena_bufs[buf].len;
+ req_id = ena_bufs[buf].req_id;
+
+ rx_info = &rx_ring->rx_buffer_info[req_id];
+
+ if (unlikely(!rx_info->page)) {
+ adapter = rx_ring->adapter;
+ netif_err(adapter, rx_err, rx_ring->netdev,
+ "Page is NULL. qid %u req_id %u\n", rx_ring->qid, req_id);
+ ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1, &rx_ring->syncp);
+ ena_reset_device(adapter, ENA_REGS_RESET_INV_RX_REQ_ID);
+ return NULL;
+ }
+
+ netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+ "rx_info %p page %p\n",
+ rx_info, rx_info->page);
+
+ buf_offset = rx_info->buf_offset;
+ pkt_offset = buf_offset - rx_ring->rx_headroom;
+ page_offset = rx_info->page_offset;
+ buf_addr = page_address(rx_info->page) + page_offset;
+
+ if (len <= rx_ring->rx_copybreak) {
+ skb = ena_alloc_skb(rx_ring, NULL, len);
+ if (unlikely(!skb))
+ return NULL;
+
+ skb_copy_to_linear_data(skb, buf_addr + buf_offset, len);
+ dma_sync_single_for_device(rx_ring->dev,
+ dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset,
+ len,
+ DMA_FROM_DEVICE);
+
+ skb_put(skb, len);
+ netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+ "RX allocated small packet. len %d.\n", skb->len);
+ skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+ rx_ring->free_ids[*next_to_clean] = req_id;
+ *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
+ rx_ring->ring_size);
+ return skb;
+ }
+
+ buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom);
+
+ /* If XDP isn't loaded try to reuse part of the RX buffer */
+ reuse_rx_buf_page = !is_xdp_loaded &&
+ ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset);
+
+ if (!reuse_rx_buf_page)
+ ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC);
+
+ skb = ena_alloc_skb(rx_ring, buf_addr, buf_len);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* Populate skb's linear part */
+ skb_reserve(skb, buf_offset);
+ skb_put(skb, len);
+ skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+
+ do {
+ netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+ "RX skb updated. len %d. data_len %d\n",
+ skb->len, skb->data_len);
+
+ if (!reuse_rx_buf_page)
+ rx_info->page = NULL;
+
+ rx_ring->free_ids[*next_to_clean] = req_id;
+ *next_to_clean =
+ ENA_RX_RING_IDX_NEXT(*next_to_clean,
+ rx_ring->ring_size);
+ if (likely(--descs == 0))
+ break;
+
+ buf++;
+ len = ena_bufs[buf].len;
+ req_id = ena_bufs[buf].req_id;
+
+ rx_info = &rx_ring->rx_buffer_info[req_id];
+
+ /* rx_info->buf_offset includes rx_ring->rx_headroom */
+ buf_offset = rx_info->buf_offset;
+ pkt_offset = buf_offset - rx_ring->rx_headroom;
+ buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom);
+ page_offset = rx_info->page_offset;
+
+ pre_reuse_paddr = dma_unmap_addr(&rx_info->ena_buf, paddr);
+
+ reuse_rx_buf_page = !is_xdp_loaded &&
+ ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset);
+
+ dma_sync_single_for_cpu(rx_ring->dev,
+ pre_reuse_paddr + pkt_offset,
+ len,
+ DMA_FROM_DEVICE);
+
+ if (!reuse_rx_buf_page)
+ ena_unmap_rx_buff_attrs(rx_ring, rx_info,
+ DMA_ATTR_SKIP_CPU_SYNC);
+
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
+ page_offset + buf_offset, len, buf_len);
+
+ } while (1);
+
+ return skb;
+}
+
+/* ena_rx_checksum - indicate in skb if hw indicated a good cksum
+ * @adapter: structure containing adapter specific data
+ * @ena_rx_ctx: received packet context/metadata
+ * @skb: skb currently being received and modified
+ */
+static void ena_rx_checksum(struct ena_ring *rx_ring,
+ struct ena_com_rx_ctx *ena_rx_ctx,
+ struct sk_buff *skb)
+{
+ /* Rx csum disabled */
+ if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) {
+ skb->ip_summed = CHECKSUM_NONE;
+ return;
+ }
+
+ /* For fragmented packets the checksum isn't valid */
+ if (ena_rx_ctx->frag) {
+ skb->ip_summed = CHECKSUM_NONE;
+ return;
+ }
+
+ /* if IP and error */
+ if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
+ (ena_rx_ctx->l3_csum_err))) {
+ /* ipv4 checksum error */
+ skb->ip_summed = CHECKSUM_NONE;
+ ena_increase_stat(&rx_ring->rx_stats.csum_bad, 1,
+ &rx_ring->syncp);
+ netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
+ "RX IPv4 header checksum error\n");
+ return;
+ }
+
+ /* if TCP/UDP */
+ if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
+ (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) {
+ if (unlikely(ena_rx_ctx->l4_csum_err)) {
+ /* TCP/UDP checksum error */
+ ena_increase_stat(&rx_ring->rx_stats.csum_bad, 1,
+ &rx_ring->syncp);
+ netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
+ "RX L4 checksum error\n");
+ skb->ip_summed = CHECKSUM_NONE;
+ return;
+ }
+
+ if (likely(ena_rx_ctx->l4_csum_checked)) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ ena_increase_stat(&rx_ring->rx_stats.csum_good, 1,
+ &rx_ring->syncp);
+ } else {
+ ena_increase_stat(&rx_ring->rx_stats.csum_unchecked, 1,
+ &rx_ring->syncp);
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+ } else {
+ skb->ip_summed = CHECKSUM_NONE;
+ return;
+ }
+
+}
+
+static void ena_set_rx_hash(struct ena_ring *rx_ring,
+ struct ena_com_rx_ctx *ena_rx_ctx,
+ struct sk_buff *skb)
+{
+ enum pkt_hash_types hash_type;
+
+ if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) {
+ if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
+ (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)))
+
+ hash_type = PKT_HASH_TYPE_L4;
+ else
+ hash_type = PKT_HASH_TYPE_NONE;
+
+ /* Override hash type if the packet is fragmented */
+ if (ena_rx_ctx->frag)
+ hash_type = PKT_HASH_TYPE_NONE;
+
+ skb_set_hash(skb, ena_rx_ctx->hash, hash_type);
+ }
+}
+
+static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp, u16 num_descs)
+{
+ struct ena_rx_buffer *rx_info;
+ int ret;
+
+ /* XDP multi-buffer packets not supported */
+ if (unlikely(num_descs > 1)) {
+ netdev_err_once(rx_ring->adapter->netdev,
+ "xdp: dropped unsupported multi-buffer packets\n");
+ ena_increase_stat(&rx_ring->rx_stats.xdp_drop, 1, &rx_ring->syncp);
+ return ENA_XDP_DROP;
+ }
+
+ rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
+ xdp_prepare_buff(xdp, page_address(rx_info->page),
+ rx_info->buf_offset,
+ rx_ring->ena_bufs[0].len, false);
+
+ ret = ena_xdp_execute(rx_ring, xdp);
+
+ /* The xdp program might expand the headers */
+ if (ret == ENA_XDP_PASS) {
+ rx_info->buf_offset = xdp->data - xdp->data_hard_start;
+ rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data;
+ }
+
+ return ret;
+}
+/* ena_clean_rx_irq - Cleanup RX irq
+ * @rx_ring: RX ring to clean
+ * @napi: napi handler
+ * @budget: how many packets driver is allowed to clean
+ *
+ * Returns the number of cleaned buffers.
+ */
+static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
+ u32 budget)
+{
+ u16 next_to_clean = rx_ring->next_to_clean;
+ struct ena_com_rx_ctx ena_rx_ctx;
+ struct ena_rx_buffer *rx_info;
+ struct ena_adapter *adapter;
+ u32 res_budget, work_done;
+ int rx_copybreak_pkt = 0;
+ int refill_threshold;
+ struct sk_buff *skb;
+ int refill_required;
+ struct xdp_buff xdp;
+ int xdp_flags = 0;
+ int total_len = 0;
+ int xdp_verdict;
+ u8 pkt_offset;
+ int rc = 0;
+ int i;
+
+ netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+ "%s qid %d\n", __func__, rx_ring->qid);
+ res_budget = budget;
+ xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq);
+
+ do {
+ xdp_verdict = ENA_XDP_PASS;
+ skb = NULL;
+ ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
+ ena_rx_ctx.max_bufs = rx_ring->sgl_size;
+ ena_rx_ctx.descs = 0;
+ ena_rx_ctx.pkt_offset = 0;
+ rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
+ rx_ring->ena_com_io_sq,
+ &ena_rx_ctx);
+ if (unlikely(rc))
+ goto error;
+
+ if (unlikely(ena_rx_ctx.descs == 0))
+ break;
+
+ /* First descriptor might have an offset set by the device */
+ rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
+ pkt_offset = ena_rx_ctx.pkt_offset;
+ rx_info->buf_offset += pkt_offset;
+
+ netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+ "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
+ rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
+ ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
+
+ dma_sync_single_for_cpu(rx_ring->dev,
+ dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset,
+ rx_ring->ena_bufs[0].len,
+ DMA_FROM_DEVICE);
+
+ if (ena_xdp_present_ring(rx_ring))
+ xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp, ena_rx_ctx.descs);
+
+ /* allocate skb and fill it */
+ if (xdp_verdict == ENA_XDP_PASS)
+ skb = ena_rx_skb(rx_ring,
+ rx_ring->ena_bufs,
+ ena_rx_ctx.descs,
+ &next_to_clean);
+
+ if (unlikely(!skb)) {
+ for (i = 0; i < ena_rx_ctx.descs; i++) {
+ int req_id = rx_ring->ena_bufs[i].req_id;
+
+ rx_ring->free_ids[next_to_clean] = req_id;
+ next_to_clean =
+ ENA_RX_RING_IDX_NEXT(next_to_clean,
+ rx_ring->ring_size);
+
+ /* Packets was passed for transmission, unmap it
+ * from RX side.
+ */
+ if (xdp_verdict & ENA_XDP_FORWARDED) {
+ ena_unmap_rx_buff_attrs(rx_ring,
+ &rx_ring->rx_buffer_info[req_id],
+ DMA_ATTR_SKIP_CPU_SYNC);
+ rx_ring->rx_buffer_info[req_id].page = NULL;
+ }
+ }
+ if (xdp_verdict != ENA_XDP_PASS) {
+ xdp_flags |= xdp_verdict;
+ total_len += ena_rx_ctx.ena_bufs[0].len;
+ res_budget--;
+ continue;
+ }
+ break;
+ }
+
+ ena_rx_checksum(rx_ring, &ena_rx_ctx, skb);
+
+ ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb);
+
+ skb_record_rx_queue(skb, rx_ring->qid);
+
+ if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak)
+ rx_copybreak_pkt++;
+
+ total_len += skb->len;
+
+ napi_gro_receive(napi, skb);
+
+ res_budget--;
+ } while (likely(res_budget));
+
+ work_done = budget - res_budget;
+ rx_ring->per_napi_packets += work_done;
+ u64_stats_update_begin(&rx_ring->syncp);
+ rx_ring->rx_stats.bytes += total_len;
+ rx_ring->rx_stats.cnt += work_done;
+ rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt;
+ u64_stats_update_end(&rx_ring->syncp);
+
+ rx_ring->next_to_clean = next_to_clean;
+
+ refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
+ refill_threshold =
+ min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
+ ENA_RX_REFILL_THRESH_PACKET);
+
+ /* Optimization, try to batch new rx buffers */
+ if (refill_required > refill_threshold) {
+ ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
+ ena_refill_rx_bufs(rx_ring, refill_required);
+ }
+
+ if (xdp_flags & ENA_XDP_REDIRECT)
+ xdp_do_flush_map();
+
+ return work_done;
+
+error:
+ if (xdp_flags & ENA_XDP_REDIRECT)
+ xdp_do_flush();
+
+ adapter = netdev_priv(rx_ring->netdev);
+
+ if (rc == -ENOSPC) {
+ ena_increase_stat(&rx_ring->rx_stats.bad_desc_num, 1,
+ &rx_ring->syncp);
+ ena_reset_device(adapter, ENA_REGS_RESET_TOO_MANY_RX_DESCS);
+ } else {
+ ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1,
+ &rx_ring->syncp);
+ ena_reset_device(adapter, ENA_REGS_RESET_INV_RX_REQ_ID);
+ }
+ return 0;
+}
+
+static void ena_dim_work(struct work_struct *w)
+{
+ struct dim *dim = container_of(w, struct dim, work);
+ struct dim_cq_moder cur_moder =
+ net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
+ struct ena_napi *ena_napi = container_of(dim, struct ena_napi, dim);
+
+ ena_napi->rx_ring->smoothed_interval = cur_moder.usec;
+ dim->state = DIM_START_MEASURE;
+}
+
+static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
+{
+ struct dim_sample dim_sample;
+ struct ena_ring *rx_ring = ena_napi->rx_ring;
+
+ if (!rx_ring->per_napi_packets)
+ return;
+
+ rx_ring->non_empty_napi_events++;
+
+ dim_update_sample(rx_ring->non_empty_napi_events,
+ rx_ring->rx_stats.cnt,
+ rx_ring->rx_stats.bytes,
+ &dim_sample);
+
+ net_dim(&ena_napi->dim, dim_sample);
+
+ rx_ring->per_napi_packets = 0;
+}
+
+static void ena_unmask_interrupt(struct ena_ring *tx_ring,
+ struct ena_ring *rx_ring)
+{
+ u32 rx_interval = tx_ring->smoothed_interval;
+ struct ena_eth_io_intr_reg intr_reg;
+
+ /* Rx ring can be NULL when for XDP tx queues which don't have an
+ * accompanying rx_ring pair.
+ */
+ if (rx_ring)
+ rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ?
+ rx_ring->smoothed_interval :
+ ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev);
+
+ /* Update intr register: rx intr delay,
+ * tx intr delay and interrupt unmask
+ */
+ ena_com_update_intr_reg(&intr_reg,
+ rx_interval,
+ tx_ring->smoothed_interval,
+ true);
+
+ ena_increase_stat(&tx_ring->tx_stats.unmask_interrupt, 1,
+ &tx_ring->syncp);
+
+ /* It is a shared MSI-X.
+ * Tx and Rx CQ have pointer to it.
+ * So we use one of them to reach the intr reg
+ * The Tx ring is used because the rx_ring is NULL for XDP queues
+ */
+ ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg);
+}
+
+static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
+ struct ena_ring *rx_ring)
+{
+ int cpu = get_cpu();
+ int numa_node;
+
+ /* Check only one ring since the 2 rings are running on the same cpu */
+ if (likely(tx_ring->cpu == cpu))
+ goto out;
+
+ tx_ring->cpu = cpu;
+ if (rx_ring)
+ rx_ring->cpu = cpu;
+
+ numa_node = cpu_to_node(cpu);
+
+ if (likely(tx_ring->numa_node == numa_node))
+ goto out;
+
+ put_cpu();
+
+ if (numa_node != NUMA_NO_NODE) {
+ ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
+ tx_ring->numa_node = numa_node;
+ if (rx_ring) {
+ rx_ring->numa_node = numa_node;
+ ena_com_update_numa_node(rx_ring->ena_com_io_cq,
+ numa_node);
+ }
+ }
+
+ return;
+out:
+ put_cpu();
+}
+
+static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget)
+{
+ u32 total_done = 0;
+ u16 next_to_clean;
+ int tx_pkts = 0;
+ u16 req_id;
+ int rc;
+
+ if (unlikely(!xdp_ring))
+ return 0;
+ next_to_clean = xdp_ring->next_to_clean;
+
+ while (tx_pkts < budget) {
+ struct ena_tx_buffer *tx_info;
+ struct xdp_frame *xdpf;
+
+ rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq,
+ &req_id);
+ if (rc) {
+ if (unlikely(rc == -EINVAL))
+ handle_invalid_req_id(xdp_ring, req_id, NULL,
+ true);
+ break;
+ }
+
+ /* validate that the request id points to a valid xdp_frame */
+ rc = validate_xdp_req_id(xdp_ring, req_id);
+ if (rc)
+ break;
+
+ tx_info = &xdp_ring->tx_buffer_info[req_id];
+ xdpf = tx_info->xdpf;
+
+ tx_info->xdpf = NULL;
+ tx_info->last_jiffies = 0;
+ ena_unmap_tx_buff(xdp_ring, tx_info);
+
+ netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
+ "tx_poll: q %d skb %p completed\n", xdp_ring->qid,
+ xdpf);
+
+ tx_pkts++;
+ total_done += tx_info->tx_descs;
+
+ xdp_return_frame(xdpf);
+ xdp_ring->free_ids[next_to_clean] = req_id;
+ next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
+ xdp_ring->ring_size);
+ }
+
+ xdp_ring->next_to_clean = next_to_clean;
+ ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done);
+ ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq);
+
+ netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
+ "tx_poll: q %d done. total pkts: %d\n",
+ xdp_ring->qid, tx_pkts);
+
+ return tx_pkts;
+}
+
+static int ena_io_poll(struct napi_struct *napi, int budget)
+{
+ struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
+ struct ena_ring *tx_ring, *rx_ring;
+ int tx_work_done;
+ int rx_work_done = 0;
+ int tx_budget;
+ int napi_comp_call = 0;
+ int ret;
+
+ tx_ring = ena_napi->tx_ring;
+ rx_ring = ena_napi->rx_ring;
+
+ tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
+
+ if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
+ test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
+ napi_complete_done(napi, 0);
+ return 0;
+ }
+
+ tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
+ /* On netpoll the budget is zero and the handler should only clean the
+ * tx completions.
+ */
+ if (likely(budget))
+ rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
+
+ /* If the device is about to reset or down, avoid unmask
+ * the interrupt and return 0 so NAPI won't reschedule
+ */
+ if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
+ test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags))) {
+ napi_complete_done(napi, 0);
+ ret = 0;
+
+ } else if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
+ napi_comp_call = 1;
+
+ /* Update numa and unmask the interrupt only when schedule
+ * from the interrupt context (vs from sk_busy_loop)
+ */
+ if (napi_complete_done(napi, rx_work_done) &&
+ READ_ONCE(ena_napi->interrupts_masked)) {
+ smp_rmb(); /* make sure interrupts_masked is read */
+ WRITE_ONCE(ena_napi->interrupts_masked, false);
+ /* We apply adaptive moderation on Rx path only.
+ * Tx uses static interrupt moderation.
+ */
+ if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
+ ena_adjust_adaptive_rx_intr_moderation(ena_napi);
+
+ ena_update_ring_numa_node(tx_ring, rx_ring);
+ ena_unmask_interrupt(tx_ring, rx_ring);
+ }
+
+ ret = rx_work_done;
+ } else {
+ ret = budget;
+ }
+
+ u64_stats_update_begin(&tx_ring->syncp);
+ tx_ring->tx_stats.napi_comp += napi_comp_call;
+ tx_ring->tx_stats.tx_poll++;
+ u64_stats_update_end(&tx_ring->syncp);
+
+ tx_ring->tx_stats.last_napi_jiffies = jiffies;
+
+ return ret;
+}
+
+static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data)
+{
+ struct ena_adapter *adapter = (struct ena_adapter *)data;
+
+ ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
+
+ /* Don't call the aenq handler before probe is done */
+ if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)))
+ ena_com_aenq_intr_handler(adapter->ena_dev, data);
+
+ return IRQ_HANDLED;
+}
+
+/* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx
+ * @irq: interrupt number
+ * @data: pointer to a network interface private napi device structure
+ */
+static irqreturn_t ena_intr_msix_io(int irq, void *data)
+{
+ struct ena_napi *ena_napi = data;
+
+ /* Used to check HW health */
+ WRITE_ONCE(ena_napi->first_interrupt, true);
+
+ WRITE_ONCE(ena_napi->interrupts_masked, true);
+ smp_wmb(); /* write interrupts_masked before calling napi */
+
+ napi_schedule_irqoff(&ena_napi->napi);
+
+ return IRQ_HANDLED;
+}
+
+/* Reserve a single MSI-X vector for management (admin + aenq).
+ * plus reserve one vector for each potential io queue.
+ * the number of potential io queues is the minimum of what the device
+ * supports and the number of vCPUs.
+ */
+static int ena_enable_msix(struct ena_adapter *adapter)
+{
+ int msix_vecs, irq_cnt;
+
+ if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
+ netif_err(adapter, probe, adapter->netdev,
+ "Error, MSI-X is already enabled\n");
+ return -EPERM;
+ }
+
+ /* Reserved the max msix vectors we might need */
+ msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
+ netif_dbg(adapter, probe, adapter->netdev,
+ "Trying to enable MSI-X, vectors %d\n", msix_vecs);
+
+ irq_cnt = pci_alloc_irq_vectors(adapter->pdev, ENA_MIN_MSIX_VEC,
+ msix_vecs, PCI_IRQ_MSIX);
+
+ if (irq_cnt < 0) {
+ netif_err(adapter, probe, adapter->netdev,
+ "Failed to enable MSI-X. irq_cnt %d\n", irq_cnt);
+ return -ENOSPC;
+ }
+
+ if (irq_cnt != msix_vecs) {
+ netif_notice(adapter, probe, adapter->netdev,
+ "Enable only %d MSI-X (out of %d), reduce the number of queues\n",
+ irq_cnt, msix_vecs);
+ adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
+ }
+
+ if (ena_init_rx_cpu_rmap(adapter))
+ netif_warn(adapter, probe, adapter->netdev,
+ "Failed to map IRQs to CPUs\n");
+
+ adapter->msix_vecs = irq_cnt;
+ set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags);
+
+ return 0;
+}
+
+static void ena_setup_mgmnt_intr(struct ena_adapter *adapter)
+{
+ u32 cpu;
+
+ snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
+ ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
+ pci_name(adapter->pdev));
+ adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler =
+ ena_intr_msix_mgmnt;
+ adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
+ adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
+ pci_irq_vector(adapter->pdev, ENA_MGMNT_IRQ_IDX);
+ cpu = cpumask_first(cpu_online_mask);
+ adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu;
+ cpumask_set_cpu(cpu,
+ &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask);
+}
+
+static void ena_setup_io_intr(struct ena_adapter *adapter)
+{
+ struct net_device *netdev;
+ int irq_idx, i, cpu;
+ int io_queue_count;
+
+ netdev = adapter->netdev;
+ io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
+
+ for (i = 0; i < io_queue_count; i++) {
+ irq_idx = ENA_IO_IRQ_IDX(i);
+ cpu = i % num_online_cpus();
+
+ snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
+ "%s-Tx-Rx-%d", netdev->name, i);
+ adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io;
+ adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i];
+ adapter->irq_tbl[irq_idx].vector =
+ pci_irq_vector(adapter->pdev, irq_idx);
+ adapter->irq_tbl[irq_idx].cpu = cpu;
+
+ cpumask_set_cpu(cpu,
+ &adapter->irq_tbl[irq_idx].affinity_hint_mask);
+ }
+}
+
+static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
+{
+ unsigned long flags = 0;
+ struct ena_irq *irq;
+ int rc;
+
+ irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
+ rc = request_irq(irq->vector, irq->handler, flags, irq->name,
+ irq->data);
+ if (rc) {
+ netif_err(adapter, probe, adapter->netdev,
+ "Failed to request admin irq\n");
+ return rc;
+ }
+
+ netif_dbg(adapter, probe, adapter->netdev,
+ "Set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
+ irq->affinity_hint_mask.bits[0], irq->vector);
+
+ irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
+
+ return rc;
+}
+
+static int ena_request_io_irq(struct ena_adapter *adapter)
+{
+ u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
+ unsigned long flags = 0;
+ struct ena_irq *irq;
+ int rc = 0, i, k;
+
+ if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
+ netif_err(adapter, ifup, adapter->netdev,
+ "Failed to request I/O IRQ: MSI-X is not enabled\n");
+ return -EINVAL;
+ }
+
+ for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
+ irq = &adapter->irq_tbl[i];
+ rc = request_irq(irq->vector, irq->handler, flags, irq->name,
+ irq->data);
+ if (rc) {
+ netif_err(adapter, ifup, adapter->netdev,
+ "Failed to request I/O IRQ. index %d rc %d\n",
+ i, rc);
+ goto err;
+ }
+
+ netif_dbg(adapter, ifup, adapter->netdev,
+ "Set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
+ i, irq->affinity_hint_mask.bits[0], irq->vector);
+
+ irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
+ }
+
+ return rc;
+
+err:
+ for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) {
+ irq = &adapter->irq_tbl[k];
+ free_irq(irq->vector, irq->data);
+ }
+
+ return rc;
+}
+
+static void ena_free_mgmnt_irq(struct ena_adapter *adapter)
+{
+ struct ena_irq *irq;
+
+ irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
+ synchronize_irq(irq->vector);
+ irq_set_affinity_hint(irq->vector, NULL);
+ free_irq(irq->vector, irq->data);
+}
+
+static void ena_free_io_irq(struct ena_adapter *adapter)
+{
+ u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
+ struct ena_irq *irq;
+ int i;
+
+#ifdef CONFIG_RFS_ACCEL
+ if (adapter->msix_vecs >= 1) {
+ free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
+ adapter->netdev->rx_cpu_rmap = NULL;
+ }
+#endif /* CONFIG_RFS_ACCEL */
+
+ for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
+ irq = &adapter->irq_tbl[i];
+ irq_set_affinity_hint(irq->vector, NULL);
+ free_irq(irq->vector, irq->data);
+ }
+}
+
+static void ena_disable_msix(struct ena_adapter *adapter)
+{
+ if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags))
+ pci_free_irq_vectors(adapter->pdev);
+}
+
+static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
+{
+ u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
+ int i;
+
+ if (!netif_running(adapter->netdev))
+ return;
+
+ for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++)
+ synchronize_irq(adapter->irq_tbl[i].vector);
+}
+
+static void ena_del_napi_in_range(struct ena_adapter *adapter,
+ int first_index,
+ int count)
+{
+ int i;
+
+ for (i = first_index; i < first_index + count; i++) {
+ netif_napi_del(&adapter->ena_napi[i].napi);
+
+ WARN_ON(!ENA_IS_XDP_INDEX(adapter, i) &&
+ adapter->ena_napi[i].xdp_ring);
+ }
+}
+
+static void ena_init_napi_in_range(struct ena_adapter *adapter,
+ int first_index, int count)
+{
+ int i;
+
+ for (i = first_index; i < first_index + count; i++) {
+ struct ena_napi *napi = &adapter->ena_napi[i];
+
+ netif_napi_add(adapter->netdev, &napi->napi,
+ ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll);
+
+ if (!ENA_IS_XDP_INDEX(adapter, i)) {
+ napi->rx_ring = &adapter->rx_ring[i];
+ napi->tx_ring = &adapter->tx_ring[i];
+ } else {
+ napi->xdp_ring = &adapter->tx_ring[i];
+ }
+ napi->qid = i;
+ }
+}
+
+static void ena_napi_disable_in_range(struct ena_adapter *adapter,
+ int first_index,
+ int count)
+{
+ int i;
+
+ for (i = first_index; i < first_index + count; i++)
+ napi_disable(&adapter->ena_napi[i].napi);
+}
+
+static void ena_napi_enable_in_range(struct ena_adapter *adapter,
+ int first_index,
+ int count)
+{
+ int i;
+
+ for (i = first_index; i < first_index + count; i++)
+ napi_enable(&adapter->ena_napi[i].napi);
+}
+
+/* Configure the Rx forwarding */
+static int ena_rss_configure(struct ena_adapter *adapter)
+{
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ int rc;
+
+ /* In case the RSS table wasn't initialized by probe */
+ if (!ena_dev->rss.tbl_log_size) {
+ rc = ena_rss_init_default(adapter);
+ if (rc && (rc != -EOPNOTSUPP)) {
+ netif_err(adapter, ifup, adapter->netdev,
+ "Failed to init RSS rc: %d\n", rc);
+ return rc;
+ }
+ }
+
+ /* Set indirect table */
+ rc = ena_com_indirect_table_set(ena_dev);
+ if (unlikely(rc && rc != -EOPNOTSUPP))
+ return rc;
+
+ /* Configure hash function (if supported) */
+ rc = ena_com_set_hash_function(ena_dev);
+ if (unlikely(rc && (rc != -EOPNOTSUPP)))
+ return rc;
+
+ /* Configure hash inputs (if supported) */
+ rc = ena_com_set_hash_ctrl(ena_dev);
+ if (unlikely(rc && (rc != -EOPNOTSUPP)))
+ return rc;
+
+ return 0;
+}
+
+static int ena_up_complete(struct ena_adapter *adapter)
+{
+ int rc;
+
+ rc = ena_rss_configure(adapter);
+ if (rc)
+ return rc;
+
+ ena_change_mtu(adapter->netdev, adapter->netdev->mtu);
+
+ ena_refill_all_rx_bufs(adapter);
+
+ /* enable transmits */
+ netif_tx_start_all_queues(adapter->netdev);
+
+ ena_napi_enable_in_range(adapter,
+ 0,
+ adapter->xdp_num_queues + adapter->num_io_queues);
+
+ return 0;
+}
+
+static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
+{
+ struct ena_com_create_io_ctx ctx;
+ struct ena_com_dev *ena_dev;
+ struct ena_ring *tx_ring;
+ u32 msix_vector;
+ u16 ena_qid;
+ int rc;
+
+ ena_dev = adapter->ena_dev;
+
+ tx_ring = &adapter->tx_ring[qid];
+ msix_vector = ENA_IO_IRQ_IDX(qid);
+ ena_qid = ENA_IO_TXQ_IDX(qid);
+
+ memset(&ctx, 0x0, sizeof(ctx));
+
+ ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
+ ctx.qid = ena_qid;
+ ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
+ ctx.msix_vector = msix_vector;
+ ctx.queue_size = tx_ring->ring_size;
+ ctx.numa_node = tx_ring->numa_node;
+
+ rc = ena_com_create_io_queue(ena_dev, &ctx);
+ if (rc) {
+ netif_err(adapter, ifup, adapter->netdev,
+ "Failed to create I/O TX queue num %d rc: %d\n",
+ qid, rc);
+ return rc;
+ }
+
+ rc = ena_com_get_io_handlers(ena_dev, ena_qid,
+ &tx_ring->ena_com_io_sq,
+ &tx_ring->ena_com_io_cq);
+ if (rc) {
+ netif_err(adapter, ifup, adapter->netdev,
+ "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
+ qid, rc);
+ ena_com_destroy_io_queue(ena_dev, ena_qid);
+ return rc;
+ }
+
+ ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
+ return rc;
+}
+
+static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
+ int first_index, int count)
+{
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ int rc, i;
+
+ for (i = first_index; i < first_index + count; i++) {
+ rc = ena_create_io_tx_queue(adapter, i);
+ if (rc)
+ goto create_err;
+ }
+
+ return 0;
+
+create_err:
+ while (i-- > first_index)
+ ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
+
+ return rc;
+}
+
+static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
+{
+ struct ena_com_dev *ena_dev;
+ struct ena_com_create_io_ctx ctx;
+ struct ena_ring *rx_ring;
+ u32 msix_vector;
+ u16 ena_qid;
+ int rc;
+
+ ena_dev = adapter->ena_dev;
+
+ rx_ring = &adapter->rx_ring[qid];
+ msix_vector = ENA_IO_IRQ_IDX(qid);
+ ena_qid = ENA_IO_RXQ_IDX(qid);
+
+ memset(&ctx, 0x0, sizeof(ctx));
+
+ ctx.qid = ena_qid;
+ ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
+ ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+ ctx.msix_vector = msix_vector;
+ ctx.queue_size = rx_ring->ring_size;
+ ctx.numa_node = rx_ring->numa_node;
+
+ rc = ena_com_create_io_queue(ena_dev, &ctx);
+ if (rc) {
+ netif_err(adapter, ifup, adapter->netdev,
+ "Failed to create I/O RX queue num %d rc: %d\n",
+ qid, rc);
+ return rc;
+ }
+
+ rc = ena_com_get_io_handlers(ena_dev, ena_qid,
+ &rx_ring->ena_com_io_sq,
+ &rx_ring->ena_com_io_cq);
+ if (rc) {
+ netif_err(adapter, ifup, adapter->netdev,
+ "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
+ qid, rc);
+ goto err;
+ }
+
+ ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
+
+ return rc;
+err:
+ ena_com_destroy_io_queue(ena_dev, ena_qid);
+ return rc;
+}
+
+static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
+{
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ int rc, i;
+
+ for (i = 0; i < adapter->num_io_queues; i++) {
+ rc = ena_create_io_rx_queue(adapter, i);
+ if (rc)
+ goto create_err;
+ INIT_WORK(&adapter->ena_napi[i].dim.work, ena_dim_work);
+ }
+
+ return 0;
+
+create_err:
+ while (i--) {
+ cancel_work_sync(&adapter->ena_napi[i].dim.work);
+ ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
+ }
+
+ return rc;
+}
+
+static void set_io_rings_size(struct ena_adapter *adapter,
+ int new_tx_size,
+ int new_rx_size)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_io_queues; i++) {
+ adapter->tx_ring[i].ring_size = new_tx_size;
+ adapter->rx_ring[i].ring_size = new_rx_size;
+ }
+}
+
+/* This function allows queue allocation to backoff when the system is
+ * low on memory. If there is not enough memory to allocate io queues
+ * the driver will try to allocate smaller queues.
+ *
+ * The backoff algorithm is as follows:
+ * 1. Try to allocate TX and RX and if successful.
+ * 1.1. return success
+ *
+ * 2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same).
+ *
+ * 3. If TX or RX is smaller than 256
+ * 3.1. return failure.
+ * 4. else
+ * 4.1. go back to 1.
+ */
+static int create_queues_with_size_backoff(struct ena_adapter *adapter)
+{
+ int rc, cur_rx_ring_size, cur_tx_ring_size;
+ int new_rx_ring_size, new_tx_ring_size;
+
+ /* current queue sizes might be set to smaller than the requested
+ * ones due to past queue allocation failures.
+ */
+ set_io_rings_size(adapter, adapter->requested_tx_ring_size,
+ adapter->requested_rx_ring_size);
+
+ while (1) {
+ if (ena_xdp_present(adapter)) {
+ rc = ena_setup_and_create_all_xdp_queues(adapter);
+
+ if (rc)
+ goto err_setup_tx;
+ }
+ rc = ena_setup_tx_resources_in_range(adapter,
+ 0,
+ adapter->num_io_queues);
+ if (rc)
+ goto err_setup_tx;
+
+ rc = ena_create_io_tx_queues_in_range(adapter,
+ 0,
+ adapter->num_io_queues);
+ if (rc)
+ goto err_create_tx_queues;
+
+ rc = ena_setup_all_rx_resources(adapter);
+ if (rc)
+ goto err_setup_rx;
+
+ rc = ena_create_all_io_rx_queues(adapter);
+ if (rc)
+ goto err_create_rx_queues;
+
+ return 0;
+
+err_create_rx_queues:
+ ena_free_all_io_rx_resources(adapter);
+err_setup_rx:
+ ena_destroy_all_tx_queues(adapter);
+err_create_tx_queues:
+ ena_free_all_io_tx_resources(adapter);
+err_setup_tx:
+ if (rc != -ENOMEM) {
+ netif_err(adapter, ifup, adapter->netdev,
+ "Queue creation failed with error code %d\n",
+ rc);
+ return rc;
+ }
+
+ cur_tx_ring_size = adapter->tx_ring[0].ring_size;
+ cur_rx_ring_size = adapter->rx_ring[0].ring_size;
+
+ netif_err(adapter, ifup, adapter->netdev,
+ "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
+ cur_tx_ring_size, cur_rx_ring_size);
+
+ new_tx_ring_size = cur_tx_ring_size;
+ new_rx_ring_size = cur_rx_ring_size;
+
+ /* Decrease the size of the larger queue, or
+ * decrease both if they are the same size.
+ */
+ if (cur_rx_ring_size <= cur_tx_ring_size)
+ new_tx_ring_size = cur_tx_ring_size / 2;
+ if (cur_rx_ring_size >= cur_tx_ring_size)
+ new_rx_ring_size = cur_rx_ring_size / 2;
+
+ if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
+ new_rx_ring_size < ENA_MIN_RING_SIZE) {
+ netif_err(adapter, ifup, adapter->netdev,
+ "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n",
+ ENA_MIN_RING_SIZE);
+ return rc;
+ }
+
+ netif_err(adapter, ifup, adapter->netdev,
+ "Retrying queue creation with sizes TX=%d, RX=%d\n",
+ new_tx_ring_size,
+ new_rx_ring_size);
+
+ set_io_rings_size(adapter, new_tx_ring_size,
+ new_rx_ring_size);
+ }
+}
+
+static int ena_up(struct ena_adapter *adapter)
+{
+ int io_queue_count, rc, i;
+
+ netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
+
+ io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
+ ena_setup_io_intr(adapter);
+
+ /* napi poll functions should be initialized before running
+ * request_irq(), to handle a rare condition where there is a pending
+ * interrupt, causing the ISR to fire immediately while the poll
+ * function wasn't set yet, causing a null dereference
+ */
+ ena_init_napi_in_range(adapter, 0, io_queue_count);
+
+ rc = ena_request_io_irq(adapter);
+ if (rc)
+ goto err_req_irq;
+
+ rc = create_queues_with_size_backoff(adapter);
+ if (rc)
+ goto err_create_queues_with_backoff;
+
+ rc = ena_up_complete(adapter);
+ if (rc)
+ goto err_up;
+
+ if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
+ netif_carrier_on(adapter->netdev);
+
+ ena_increase_stat(&adapter->dev_stats.interface_up, 1,
+ &adapter->syncp);
+
+ set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+
+ /* Enable completion queues interrupt */
+ for (i = 0; i < adapter->num_io_queues; i++)
+ ena_unmask_interrupt(&adapter->tx_ring[i],
+ &adapter->rx_ring[i]);
+
+ /* schedule napi in case we had pending packets
+ * from the last time we disable napi
+ */
+ for (i = 0; i < io_queue_count; i++)
+ napi_schedule(&adapter->ena_napi[i].napi);
+
+ return rc;
+
+err_up:
+ ena_destroy_all_tx_queues(adapter);
+ ena_free_all_io_tx_resources(adapter);
+ ena_destroy_all_rx_queues(adapter);
+ ena_free_all_io_rx_resources(adapter);
+err_create_queues_with_backoff:
+ ena_free_io_irq(adapter);
+err_req_irq:
+ ena_del_napi_in_range(adapter, 0, io_queue_count);
+
+ return rc;
+}
+
+static void ena_down(struct ena_adapter *adapter)
+{
+ int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
+
+ netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__);
+
+ clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+
+ ena_increase_stat(&adapter->dev_stats.interface_down, 1,
+ &adapter->syncp);
+
+ netif_carrier_off(adapter->netdev);
+ netif_tx_disable(adapter->netdev);
+
+ /* After this point the napi handler won't enable the tx queue */
+ ena_napi_disable_in_range(adapter, 0, io_queue_count);
+
+ /* After destroy the queue there won't be any new interrupts */
+
+ if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) {
+ int rc;
+
+ rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
+ if (rc)
+ netif_err(adapter, ifdown, adapter->netdev,
+ "Device reset failed\n");
+ /* stop submitting admin commands on a device that was reset */
+ ena_com_set_admin_running_state(adapter->ena_dev, false);
+ }
+
+ ena_destroy_all_io_queues(adapter);
+
+ ena_disable_io_intr_sync(adapter);
+ ena_free_io_irq(adapter);
+ ena_del_napi_in_range(adapter, 0, io_queue_count);
+
+ ena_free_all_tx_bufs(adapter);
+ ena_free_all_rx_bufs(adapter);
+ ena_free_all_io_tx_resources(adapter);
+ ena_free_all_io_rx_resources(adapter);
+}
+
+/* ena_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP). At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ */
+static int ena_open(struct net_device *netdev)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+ int rc;
+
+ /* Notify the stack of the actual queue counts. */
+ rc = netif_set_real_num_tx_queues(netdev, adapter->num_io_queues);
+ if (rc) {
+ netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
+ return rc;
+ }
+
+ rc = netif_set_real_num_rx_queues(netdev, adapter->num_io_queues);
+ if (rc) {
+ netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
+ return rc;
+ }
+
+ rc = ena_up(adapter);
+ if (rc)
+ return rc;
+
+ return rc;
+}
+
+/* ena_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS. The hardware is still under the drivers control, but
+ * needs to be disabled. A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ */
+static int ena_close(struct net_device *netdev)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+
+ netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
+
+ if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
+ return 0;
+
+ if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+ ena_down(adapter);
+
+ /* Check for device status and issue reset if needed*/
+ check_for_admin_com_state(adapter);
+ if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+ netif_err(adapter, ifdown, adapter->netdev,
+ "Destroy failure, restarting device\n");
+ ena_dump_stats_to_dmesg(adapter);
+ /* rtnl lock already obtained in dev_ioctl() layer */
+ ena_destroy_device(adapter, false);
+ ena_restore_device(adapter);
+ }
+
+ return 0;
+}
+
+int ena_update_queue_params(struct ena_adapter *adapter,
+ u32 new_tx_size,
+ u32 new_rx_size,
+ u32 new_llq_header_len)
+{
+ bool dev_was_up, large_llq_changed = false;
+ int rc = 0;
+
+ dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+ ena_close(adapter->netdev);
+ adapter->requested_tx_ring_size = new_tx_size;
+ adapter->requested_rx_ring_size = new_rx_size;
+ ena_init_io_rings(adapter,
+ 0,
+ adapter->xdp_num_queues +
+ adapter->num_io_queues);
+
+ large_llq_changed = adapter->ena_dev->tx_mem_queue_type ==
+ ENA_ADMIN_PLACEMENT_POLICY_DEV;
+ large_llq_changed &=
+ new_llq_header_len != adapter->ena_dev->tx_max_header_size;
+
+ /* a check that the configuration is valid is done by caller */
+ if (large_llq_changed) {
+ adapter->large_llq_header_enabled = !adapter->large_llq_header_enabled;
+
+ ena_destroy_device(adapter, false);
+ rc = ena_restore_device(adapter);
+ }
+
+ return dev_was_up && !rc ? ena_up(adapter) : rc;
+}
+
+int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak)
+{
+ struct ena_ring *rx_ring;
+ int i;
+
+ if (rx_copybreak > min_t(u16, adapter->netdev->mtu, ENA_PAGE_SIZE))
+ return -EINVAL;
+
+ adapter->rx_copybreak = rx_copybreak;
+
+ for (i = 0; i < adapter->num_io_queues; i++) {
+ rx_ring = &adapter->rx_ring[i];
+ rx_ring->rx_copybreak = rx_copybreak;
+ }
+
+ return 0;
+}
+
+int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
+{
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ int prev_channel_count;
+ bool dev_was_up;
+
+ dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+ ena_close(adapter->netdev);
+ prev_channel_count = adapter->num_io_queues;
+ adapter->num_io_queues = new_channel_count;
+ if (ena_xdp_present(adapter) &&
+ ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) {
+ adapter->xdp_first_ring = new_channel_count;
+ adapter->xdp_num_queues = new_channel_count;
+ if (prev_channel_count > new_channel_count)
+ ena_xdp_exchange_program_rx_in_range(adapter,
+ NULL,
+ new_channel_count,
+ prev_channel_count);
+ else
+ ena_xdp_exchange_program_rx_in_range(adapter,
+ adapter->xdp_bpf_prog,
+ prev_channel_count,
+ new_channel_count);
+ }
+
+ /* We need to destroy the rss table so that the indirection
+ * table will be reinitialized by ena_up()
+ */
+ ena_com_rss_destroy(ena_dev);
+ ena_init_io_rings(adapter,
+ 0,
+ adapter->xdp_num_queues +
+ adapter->num_io_queues);
+ return dev_was_up ? ena_open(adapter->netdev) : 0;
+}
+
+static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx,
+ struct sk_buff *skb,
+ bool disable_meta_caching)
+{
+ u32 mss = skb_shinfo(skb)->gso_size;
+ struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
+ u8 l4_protocol = 0;
+
+ if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) {
+ ena_tx_ctx->l4_csum_enable = 1;
+ if (mss) {
+ ena_tx_ctx->tso_enable = 1;
+ ena_meta->l4_hdr_len = tcp_hdr(skb)->doff;
+ ena_tx_ctx->l4_csum_partial = 0;
+ } else {
+ ena_tx_ctx->tso_enable = 0;
+ ena_meta->l4_hdr_len = 0;
+ ena_tx_ctx->l4_csum_partial = 1;
+ }
+
+ switch (ip_hdr(skb)->version) {
+ case IPVERSION:
+ ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
+ if (ip_hdr(skb)->frag_off & htons(IP_DF))
+ ena_tx_ctx->df = 1;
+ if (mss)
+ ena_tx_ctx->l3_csum_enable = 1;
+ l4_protocol = ip_hdr(skb)->protocol;
+ break;
+ case 6:
+ ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
+ l4_protocol = ipv6_hdr(skb)->nexthdr;
+ break;
+ default:
+ break;
+ }
+
+ if (l4_protocol == IPPROTO_TCP)
+ ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
+ else
+ ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
+
+ ena_meta->mss = mss;
+ ena_meta->l3_hdr_len = skb_network_header_len(skb);
+ ena_meta->l3_hdr_offset = skb_network_offset(skb);
+ ena_tx_ctx->meta_valid = 1;
+ } else if (disable_meta_caching) {
+ memset(ena_meta, 0, sizeof(*ena_meta));
+ ena_tx_ctx->meta_valid = 1;
+ } else {
+ ena_tx_ctx->meta_valid = 0;
+ }
+}
+
+static int ena_check_and_linearize_skb(struct ena_ring *tx_ring,
+ struct sk_buff *skb)
+{
+ int num_frags, header_len, rc;
+
+ num_frags = skb_shinfo(skb)->nr_frags;
+ header_len = skb_headlen(skb);
+
+ if (num_frags < tx_ring->sgl_size)
+ return 0;
+
+ if ((num_frags == tx_ring->sgl_size) &&
+ (header_len < tx_ring->tx_max_header_size))
+ return 0;
+
+ ena_increase_stat(&tx_ring->tx_stats.linearize, 1, &tx_ring->syncp);
+
+ rc = skb_linearize(skb);
+ if (unlikely(rc)) {
+ ena_increase_stat(&tx_ring->tx_stats.linearize_failed, 1,
+ &tx_ring->syncp);
+ }
+
+ return rc;
+}
+
+static int ena_tx_map_skb(struct ena_ring *tx_ring,
+ struct ena_tx_buffer *tx_info,
+ struct sk_buff *skb,
+ void **push_hdr,
+ u16 *header_len)
+{
+ struct ena_adapter *adapter = tx_ring->adapter;
+ struct ena_com_buf *ena_buf;
+ dma_addr_t dma;
+ u32 skb_head_len, frag_len, last_frag;
+ u16 push_len = 0;
+ u16 delta = 0;
+ int i = 0;
+
+ skb_head_len = skb_headlen(skb);
+ tx_info->skb = skb;
+ ena_buf = tx_info->bufs;
+
+ if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+ /* When the device is LLQ mode, the driver will copy
+ * the header into the device memory space.
+ * the ena_com layer assume the header is in a linear
+ * memory space.
+ * This assumption might be wrong since part of the header
+ * can be in the fragmented buffers.
+ * Use skb_header_pointer to make sure the header is in a
+ * linear memory space.
+ */
+
+ push_len = min_t(u32, skb->len, tx_ring->tx_max_header_size);
+ *push_hdr = skb_header_pointer(skb, 0, push_len,
+ tx_ring->push_buf_intermediate_buf);
+ *header_len = push_len;
+ if (unlikely(skb->data != *push_hdr)) {
+ ena_increase_stat(&tx_ring->tx_stats.llq_buffer_copy, 1,
+ &tx_ring->syncp);
+
+ delta = push_len - skb_head_len;
+ }
+ } else {
+ *push_hdr = NULL;
+ *header_len = min_t(u32, skb_head_len,
+ tx_ring->tx_max_header_size);
+ }
+
+ netif_dbg(adapter, tx_queued, adapter->netdev,
+ "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
+ *push_hdr, push_len);
+
+ if (skb_head_len > push_len) {
+ dma = dma_map_single(tx_ring->dev, skb->data + push_len,
+ skb_head_len - push_len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
+ goto error_report_dma_error;
+
+ ena_buf->paddr = dma;
+ ena_buf->len = skb_head_len - push_len;
+
+ ena_buf++;
+ tx_info->num_of_bufs++;
+ tx_info->map_linear_data = 1;
+ } else {
+ tx_info->map_linear_data = 0;
+ }
+
+ last_frag = skb_shinfo(skb)->nr_frags;
+
+ for (i = 0; i < last_frag; i++) {
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ frag_len = skb_frag_size(frag);
+
+ if (unlikely(delta >= frag_len)) {
+ delta -= frag_len;
+ continue;
+ }
+
+ dma = skb_frag_dma_map(tx_ring->dev, frag, delta,
+ frag_len - delta, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
+ goto error_report_dma_error;
+
+ ena_buf->paddr = dma;
+ ena_buf->len = frag_len - delta;
+ ena_buf++;
+ tx_info->num_of_bufs++;
+ delta = 0;
+ }
+
+ return 0;
+
+error_report_dma_error:
+ ena_increase_stat(&tx_ring->tx_stats.dma_mapping_err, 1,
+ &tx_ring->syncp);
+ netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map skb\n");
+
+ tx_info->skb = NULL;
+
+ tx_info->num_of_bufs += i;
+ ena_unmap_tx_buff(tx_ring, tx_info);
+
+ return -EINVAL;
+}
+
+/* Called with netif_tx_lock. */
+static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ena_adapter *adapter = netdev_priv(dev);
+ struct ena_tx_buffer *tx_info;
+ struct ena_com_tx_ctx ena_tx_ctx;
+ struct ena_ring *tx_ring;
+ struct netdev_queue *txq;
+ void *push_hdr;
+ u16 next_to_use, req_id, header_len;
+ int qid, rc;
+
+ netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
+ /* Determine which tx ring we will be placed on */
+ qid = skb_get_queue_mapping(skb);
+ tx_ring = &adapter->tx_ring[qid];
+ txq = netdev_get_tx_queue(dev, qid);
+
+ rc = ena_check_and_linearize_skb(tx_ring, skb);
+ if (unlikely(rc))
+ goto error_drop_packet;
+
+ skb_tx_timestamp(skb);
+
+ next_to_use = tx_ring->next_to_use;
+ req_id = tx_ring->free_ids[next_to_use];
+ tx_info = &tx_ring->tx_buffer_info[req_id];
+ tx_info->num_of_bufs = 0;
+
+ WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
+
+ rc = ena_tx_map_skb(tx_ring, tx_info, skb, &push_hdr, &header_len);
+ if (unlikely(rc))
+ goto error_drop_packet;
+
+ memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
+ ena_tx_ctx.ena_bufs = tx_info->bufs;
+ ena_tx_ctx.push_header = push_hdr;
+ ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
+ ena_tx_ctx.req_id = req_id;
+ ena_tx_ctx.header_len = header_len;
+
+ /* set flags and meta data */
+ ena_tx_csum(&ena_tx_ctx, skb, tx_ring->disable_meta_caching);
+
+ rc = ena_xmit_common(dev,
+ tx_ring,
+ tx_info,
+ &ena_tx_ctx,
+ next_to_use,
+ skb->len);
+ if (rc)
+ goto error_unmap_dma;
+
+ netdev_tx_sent_queue(txq, skb->len);
+
+ /* stop the queue when no more space available, the packet can have up
+ * to sgl_size + 2. one for the meta descriptor and one for header
+ * (if the header is larger than tx_max_header_size).
+ */
+ if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+ tx_ring->sgl_size + 2))) {
+ netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
+ __func__, qid);
+
+ netif_tx_stop_queue(txq);
+ ena_increase_stat(&tx_ring->tx_stats.queue_stop, 1,
+ &tx_ring->syncp);
+
+ /* There is a rare condition where this function decide to
+ * stop the queue but meanwhile clean_tx_irq updates
+ * next_to_completion and terminates.
+ * The queue will remain stopped forever.
+ * To solve this issue add a mb() to make sure that
+ * netif_tx_stop_queue() write is vissible before checking if
+ * there is additional space in the queue.
+ */
+ smp_mb();
+
+ if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+ ENA_TX_WAKEUP_THRESH)) {
+ netif_tx_wake_queue(txq);
+ ena_increase_stat(&tx_ring->tx_stats.queue_wakeup, 1,
+ &tx_ring->syncp);
+ }
+ }
+
+ if (netif_xmit_stopped(txq) || !netdev_xmit_more())
+ /* trigger the dma engine. ena_ring_tx_doorbell()
+ * calls a memory barrier inside it.
+ */
+ ena_ring_tx_doorbell(tx_ring);
+
+ return NETDEV_TX_OK;
+
+error_unmap_dma:
+ ena_unmap_tx_buff(tx_ring, tx_info);
+ tx_info->skb = NULL;
+
+error_drop_packet:
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev)
+{
+ u16 qid;
+ /* we suspect that this is good for in--kernel network services that
+ * want to loop incoming skb rx to tx in normal user generated traffic,
+ * most probably we will not get to this
+ */
+ if (skb_rx_queue_recorded(skb))
+ qid = skb_get_rx_queue(skb);
+ else
+ qid = netdev_pick_tx(dev, skb, NULL);
+
+ return qid;
+}
+
+static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct ena_admin_host_info *host_info;
+ int rc;
+
+ /* Allocate only the host info */
+ rc = ena_com_allocate_host_info(ena_dev);
+ if (rc) {
+ dev_err(dev, "Cannot allocate host info\n");
+ return;
+ }
+
+ host_info = ena_dev->host_attr.host_info;
+
+ host_info->bdf = pci_dev_id(pdev);
+ host_info->os_type = ENA_ADMIN_OS_LINUX;
+ host_info->kernel_ver = LINUX_VERSION_CODE;
+ strscpy(host_info->kernel_ver_str, utsname()->version,
+ sizeof(host_info->kernel_ver_str) - 1);
+ host_info->os_dist = 0;
+ strncpy(host_info->os_dist_str, utsname()->release,
+ sizeof(host_info->os_dist_str) - 1);
+ host_info->driver_version =
+ (DRV_MODULE_GEN_MAJOR) |
+ (DRV_MODULE_GEN_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
+ (DRV_MODULE_GEN_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) |
+ ("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT);
+ host_info->num_cpus = num_online_cpus();
+
+ host_info->driver_supported_features =
+ ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
+ ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK |
+ ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK |
+ ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK |
+ ENA_ADMIN_HOST_INFO_RX_PAGE_REUSE_MASK;
+
+ rc = ena_com_set_host_attributes(ena_dev);
+ if (rc) {
+ if (rc == -EOPNOTSUPP)
+ dev_warn(dev, "Cannot set host attributes\n");
+ else
+ dev_err(dev, "Cannot set host attributes\n");
+
+ goto err;
+ }
+
+ return;
+
+err:
+ ena_com_delete_host_info(ena_dev);
+}
+
+static void ena_config_debug_area(struct ena_adapter *adapter)
+{
+ u32 debug_area_size;
+ int rc, ss_count;
+
+ ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS);
+ if (ss_count <= 0) {
+ netif_err(adapter, drv, adapter->netdev,
+ "SS count is negative\n");
+ return;
+ }
+
+ /* allocate 32 bytes for each string and 64bit for the value */
+ debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count;
+
+ rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size);
+ if (rc) {
+ netif_err(adapter, drv, adapter->netdev,
+ "Cannot allocate debug area\n");
+ return;
+ }
+
+ rc = ena_com_set_host_attributes(adapter->ena_dev);
+ if (rc) {
+ if (rc == -EOPNOTSUPP)
+ netif_warn(adapter, drv, adapter->netdev,
+ "Cannot set host attributes\n");
+ else
+ netif_err(adapter, drv, adapter->netdev,
+ "Cannot set host attributes\n");
+ goto err;
+ }
+
+ return;
+err:
+ ena_com_delete_debug_area(adapter->ena_dev);
+}
+
+int ena_update_hw_stats(struct ena_adapter *adapter)
+{
+ int rc;
+
+ rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_stats);
+ if (rc) {
+ netdev_err(adapter->netdev, "Failed to get ENI stats\n");
+ return rc;
+ }
+
+ return 0;
+}
+
+static void ena_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+ struct ena_ring *rx_ring, *tx_ring;
+ unsigned int start;
+ u64 rx_drops;
+ u64 tx_drops;
+ int i;
+
+ if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+ return;
+
+ for (i = 0; i < adapter->num_io_queues; i++) {
+ u64 bytes, packets;
+
+ tx_ring = &adapter->tx_ring[i];
+
+ do {
+ start = u64_stats_fetch_begin(&tx_ring->syncp);
+ packets = tx_ring->tx_stats.cnt;
+ bytes = tx_ring->tx_stats.bytes;
+ } while (u64_stats_fetch_retry(&tx_ring->syncp, start));
+
+ stats->tx_packets += packets;
+ stats->tx_bytes += bytes;
+
+ rx_ring = &adapter->rx_ring[i];
+
+ do {
+ start = u64_stats_fetch_begin(&rx_ring->syncp);
+ packets = rx_ring->rx_stats.cnt;
+ bytes = rx_ring->rx_stats.bytes;
+ } while (u64_stats_fetch_retry(&rx_ring->syncp, start));
+
+ stats->rx_packets += packets;
+ stats->rx_bytes += bytes;
+ }
+
+ do {
+ start = u64_stats_fetch_begin(&adapter->syncp);
+ rx_drops = adapter->dev_stats.rx_drops;
+ tx_drops = adapter->dev_stats.tx_drops;
+ } while (u64_stats_fetch_retry(&adapter->syncp, start));
+
+ stats->rx_dropped = rx_drops;
+ stats->tx_dropped = tx_drops;
+
+ stats->multicast = 0;
+ stats->collisions = 0;
+
+ stats->rx_length_errors = 0;
+ stats->rx_crc_errors = 0;
+ stats->rx_frame_errors = 0;
+ stats->rx_fifo_errors = 0;
+ stats->rx_missed_errors = 0;
+ stats->tx_window_errors = 0;
+
+ stats->rx_errors = 0;
+ stats->tx_errors = 0;
+}
+
+static const struct net_device_ops ena_netdev_ops = {
+ .ndo_open = ena_open,
+ .ndo_stop = ena_close,
+ .ndo_start_xmit = ena_start_xmit,
+ .ndo_select_queue = ena_select_queue,
+ .ndo_get_stats64 = ena_get_stats64,
+ .ndo_tx_timeout = ena_tx_timeout,
+ .ndo_change_mtu = ena_change_mtu,
+ .ndo_set_mac_address = NULL,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_bpf = ena_xdp,
+ .ndo_xdp_xmit = ena_xdp_xmit,
+};
+
+static void ena_calc_io_queue_size(struct ena_adapter *adapter,
+ struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+ struct ena_admin_feature_llq_desc *llq = &get_feat_ctx->llq;
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ u32 tx_queue_size = ENA_DEFAULT_RING_SIZE;
+ u32 rx_queue_size = ENA_DEFAULT_RING_SIZE;
+ u32 max_tx_queue_size;
+ u32 max_rx_queue_size;
+
+ /* If this function is called after driver load, the ring sizes have already
+ * been configured. Take it into account when recalculating ring size.
+ */
+ if (adapter->tx_ring->ring_size)
+ tx_queue_size = adapter->tx_ring->ring_size;
+
+ if (adapter->rx_ring->ring_size)
+ rx_queue_size = adapter->rx_ring->ring_size;
+
+ if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
+ struct ena_admin_queue_ext_feature_fields *max_queue_ext =
+ &get_feat_ctx->max_queue_ext.max_queue_ext;
+ max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth,
+ max_queue_ext->max_rx_sq_depth);
+ max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
+
+ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+ max_tx_queue_size = min_t(u32, max_tx_queue_size,
+ llq->max_llq_depth);
+ else
+ max_tx_queue_size = min_t(u32, max_tx_queue_size,
+ max_queue_ext->max_tx_sq_depth);
+
+ adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+ max_queue_ext->max_per_packet_tx_descs);
+ adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+ max_queue_ext->max_per_packet_rx_descs);
+ } else {
+ struct ena_admin_queue_feature_desc *max_queues =
+ &get_feat_ctx->max_queues;
+ max_rx_queue_size = min_t(u32, max_queues->max_cq_depth,
+ max_queues->max_sq_depth);
+ max_tx_queue_size = max_queues->max_cq_depth;
+
+ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+ max_tx_queue_size = min_t(u32, max_tx_queue_size,
+ llq->max_llq_depth);
+ else
+ max_tx_queue_size = min_t(u32, max_tx_queue_size,
+ max_queues->max_sq_depth);
+
+ adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+ max_queues->max_packet_tx_descs);
+ adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+ max_queues->max_packet_rx_descs);
+ }
+
+ max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size);
+ max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size);
+
+ /* When forcing large headers, we multiply the entry size by 2, and therefore divide
+ * the queue size by 2, leaving the amount of memory used by the queues unchanged.
+ */
+ if (adapter->large_llq_header_enabled) {
+ if ((llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) &&
+ ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+ max_tx_queue_size /= 2;
+ dev_info(&adapter->pdev->dev,
+ "Forcing large headers and decreasing maximum TX queue size to %d\n",
+ max_tx_queue_size);
+ } else {
+ dev_err(&adapter->pdev->dev,
+ "Forcing large headers failed: LLQ is disabled or device does not support large headers\n");
+
+ adapter->large_llq_header_enabled = false;
+ }
+ }
+
+ tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
+ max_tx_queue_size);
+ rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
+ max_rx_queue_size);
+
+ tx_queue_size = rounddown_pow_of_two(tx_queue_size);
+ rx_queue_size = rounddown_pow_of_two(rx_queue_size);
+
+ adapter->max_tx_ring_size = max_tx_queue_size;
+ adapter->max_rx_ring_size = max_rx_queue_size;
+ adapter->requested_tx_ring_size = tx_queue_size;
+ adapter->requested_rx_ring_size = rx_queue_size;
+}
+
+static int ena_device_validate_params(struct ena_adapter *adapter,
+ struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+ struct net_device *netdev = adapter->netdev;
+ int rc;
+
+ rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr,
+ adapter->mac_addr);
+ if (!rc) {
+ netif_err(adapter, drv, netdev,
+ "Error, mac address are different\n");
+ return -EINVAL;
+ }
+
+ if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
+ netif_err(adapter, drv, netdev,
+ "Error, device max mtu is smaller than netdev MTU\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void set_default_llq_configurations(struct ena_adapter *adapter,
+ struct ena_llq_configurations *llq_config,
+ struct ena_admin_feature_llq_desc *llq)
+{
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+
+ llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
+ llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
+ llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
+
+ adapter->large_llq_header_supported =
+ !!(ena_dev->supported_features & BIT(ENA_ADMIN_LLQ));
+ adapter->large_llq_header_supported &=
+ !!(llq->entry_size_ctrl_supported &
+ ENA_ADMIN_LIST_ENTRY_SIZE_256B);
+
+ if ((llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) &&
+ adapter->large_llq_header_enabled) {
+ llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_256B;
+ llq_config->llq_ring_entry_size_value = 256;
+ } else {
+ llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
+ llq_config->llq_ring_entry_size_value = 128;
+ }
+}
+
+static int ena_set_queues_placement_policy(struct pci_dev *pdev,
+ struct ena_com_dev *ena_dev,
+ struct ena_admin_feature_llq_desc *llq,
+ struct ena_llq_configurations *llq_default_configurations)
+{
+ int rc;
+ u32 llq_feature_mask;
+
+ llq_feature_mask = 1 << ENA_ADMIN_LLQ;
+ if (!(ena_dev->supported_features & llq_feature_mask)) {
+ dev_warn(&pdev->dev,
+ "LLQ is not supported Fallback to host mode policy.\n");
+ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+ return 0;
+ }
+
+ if (!ena_dev->mem_bar) {
+ netdev_err(ena_dev->net_device,
+ "LLQ is advertised as supported but device doesn't expose mem bar\n");
+ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+ return 0;
+ }
+
+ rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
+ if (unlikely(rc)) {
+ dev_err(&pdev->dev,
+ "Failed to configure the device mode. Fallback to host mode policy.\n");
+ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+ }
+
+ return 0;
+}
+
+static int ena_map_llq_mem_bar(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
+ int bars)
+{
+ bool has_mem_bar = !!(bars & BIT(ENA_MEM_BAR));
+
+ if (!has_mem_bar)
+ return 0;
+
+ ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
+ pci_resource_start(pdev, ENA_MEM_BAR),
+ pci_resource_len(pdev, ENA_MEM_BAR));
+
+ if (!ena_dev->mem_bar)
+ return -EFAULT;
+
+ return 0;
+}
+
+static int ena_device_init(struct ena_adapter *adapter, struct pci_dev *pdev,
+ struct ena_com_dev_get_features_ctx *get_feat_ctx,
+ bool *wd_state)
+{
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ struct ena_llq_configurations llq_config;
+ struct device *dev = &pdev->dev;
+ bool readless_supported;
+ u32 aenq_groups;
+ int dma_width;
+ int rc;
+
+ rc = ena_com_mmio_reg_read_request_init(ena_dev);
+ if (rc) {
+ dev_err(dev, "Failed to init mmio read less\n");
+ return rc;
+ }
+
+ /* The PCIe configuration space revision id indicate if mmio reg
+ * read is disabled
+ */
+ readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ);
+ ena_com_set_mmio_read_mode(ena_dev, readless_supported);
+
+ rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
+ if (rc) {
+ dev_err(dev, "Can not reset device\n");
+ goto err_mmio_read_less;
+ }
+
+ rc = ena_com_validate_version(ena_dev);
+ if (rc) {
+ dev_err(dev, "Device version is too low\n");
+ goto err_mmio_read_less;
+ }
+
+ dma_width = ena_com_get_dma_width(ena_dev);
+ if (dma_width < 0) {
+ dev_err(dev, "Invalid dma width value %d", dma_width);
+ rc = dma_width;
+ goto err_mmio_read_less;
+ }
+
+ rc = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_width));
+ if (rc) {
+ dev_err(dev, "dma_set_mask_and_coherent failed %d\n", rc);
+ goto err_mmio_read_less;
+ }
+
+ /* ENA admin level init */
+ rc = ena_com_admin_init(ena_dev, &aenq_handlers);
+ if (rc) {
+ dev_err(dev,
+ "Can not initialize ena admin queue with device\n");
+ goto err_mmio_read_less;
+ }
+
+ /* To enable the msix interrupts the driver needs to know the number
+ * of queues. So the driver uses polling mode to retrieve this
+ * information
+ */
+ ena_com_set_admin_polling_mode(ena_dev, true);
+
+ ena_config_host_info(ena_dev, pdev);
+
+ /* Get Device Attributes*/
+ rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
+ if (rc) {
+ dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc);
+ goto err_admin_init;
+ }
+
+ /* Try to turn all the available aenq groups */
+ aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
+ BIT(ENA_ADMIN_FATAL_ERROR) |
+ BIT(ENA_ADMIN_WARNING) |
+ BIT(ENA_ADMIN_NOTIFICATION) |
+ BIT(ENA_ADMIN_KEEP_ALIVE);
+
+ aenq_groups &= get_feat_ctx->aenq.supported_groups;
+
+ rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
+ if (rc) {
+ dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc);
+ goto err_admin_init;
+ }
+
+ *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
+
+ set_default_llq_configurations(adapter, &llq_config, &get_feat_ctx->llq);
+
+ rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq,
+ &llq_config);
+ if (rc) {
+ dev_err(dev, "ENA device init failed\n");
+ goto err_admin_init;
+ }
+
+ ena_calc_io_queue_size(adapter, get_feat_ctx);
+
+ return 0;
+
+err_admin_init:
+ ena_com_delete_host_info(ena_dev);
+ ena_com_admin_destroy(ena_dev);
+err_mmio_read_less:
+ ena_com_mmio_reg_read_request_destroy(ena_dev);
+
+ return rc;
+}
+
+static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
+{
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ struct device *dev = &adapter->pdev->dev;
+ int rc;
+
+ rc = ena_enable_msix(adapter);
+ if (rc) {
+ dev_err(dev, "Can not reserve msix vectors\n");
+ return rc;
+ }
+
+ ena_setup_mgmnt_intr(adapter);
+
+ rc = ena_request_mgmnt_irq(adapter);
+ if (rc) {
+ dev_err(dev, "Can not setup management interrupts\n");
+ goto err_disable_msix;
+ }
+
+ ena_com_set_admin_polling_mode(ena_dev, false);
+
+ ena_com_admin_aenq_enable(ena_dev);
+
+ return 0;
+
+err_disable_msix:
+ ena_disable_msix(adapter);
+
+ return rc;
+}
+
+static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ bool dev_up;
+
+ if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
+ return;
+
+ netif_carrier_off(netdev);
+
+ del_timer_sync(&adapter->timer_service);
+
+ dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+ adapter->dev_up_before_reset = dev_up;
+ if (!graceful)
+ ena_com_set_admin_running_state(ena_dev, false);
+
+ if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+ ena_down(adapter);
+
+ /* Stop the device from sending AENQ events (in case reset flag is set
+ * and device is up, ena_down() already reset the device.
+ */
+ if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
+ ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
+
+ ena_free_mgmnt_irq(adapter);
+
+ ena_disable_msix(adapter);
+
+ ena_com_abort_admin_commands(ena_dev);
+
+ ena_com_wait_for_abort_completion(ena_dev);
+
+ ena_com_admin_destroy(ena_dev);
+
+ ena_com_mmio_reg_read_request_destroy(ena_dev);
+
+ /* return reset reason to default value */
+ adapter->reset_reason = ENA_REGS_RESET_NORMAL;
+
+ clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
+}
+
+static int ena_restore_device(struct ena_adapter *adapter)
+{
+ struct ena_com_dev_get_features_ctx get_feat_ctx;
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ struct pci_dev *pdev = adapter->pdev;
+ struct ena_ring *txr;
+ int rc, count, i;
+ bool wd_state;
+
+ set_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
+ rc = ena_device_init(adapter, adapter->pdev, &get_feat_ctx, &wd_state);
+ if (rc) {
+ dev_err(&pdev->dev, "Can not initialize device\n");
+ goto err;
+ }
+ adapter->wd_state = wd_state;
+
+ count = adapter->xdp_num_queues + adapter->num_io_queues;
+ for (i = 0 ; i < count; i++) {
+ txr = &adapter->tx_ring[i];
+ txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
+ txr->tx_max_header_size = ena_dev->tx_max_header_size;
+ }
+
+ rc = ena_device_validate_params(adapter, &get_feat_ctx);
+ if (rc) {
+ dev_err(&pdev->dev, "Validation of device parameters failed\n");
+ goto err_device_destroy;
+ }
+
+ rc = ena_enable_msix_and_set_admin_interrupts(adapter);
+ if (rc) {
+ dev_err(&pdev->dev, "Enable MSI-X failed\n");
+ goto err_device_destroy;
+ }
+ /* If the interface was up before the reset bring it up */
+ if (adapter->dev_up_before_reset) {
+ rc = ena_up(adapter);
+ if (rc) {
+ dev_err(&pdev->dev, "Failed to create I/O queues\n");
+ goto err_disable_msix;
+ }
+ }
+
+ set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
+
+ clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
+ if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
+ netif_carrier_on(adapter->netdev);
+
+ mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
+ adapter->last_keep_alive_jiffies = jiffies;
+
+ return rc;
+err_disable_msix:
+ ena_free_mgmnt_irq(adapter);
+ ena_disable_msix(adapter);
+err_device_destroy:
+ ena_com_abort_admin_commands(ena_dev);
+ ena_com_wait_for_abort_completion(ena_dev);
+ ena_com_admin_destroy(ena_dev);
+ ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
+ ena_com_mmio_reg_read_request_destroy(ena_dev);
+err:
+ clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
+ clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
+ dev_err(&pdev->dev,
+ "Reset attempt failed. Can not reset the device\n");
+
+ return rc;
+}
+
+static void ena_fw_reset_device(struct work_struct *work)
+{
+ struct ena_adapter *adapter =
+ container_of(work, struct ena_adapter, reset_task);
+
+ rtnl_lock();
+
+ if (likely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+ ena_destroy_device(adapter, false);
+ ena_restore_device(adapter);
+
+ dev_err(&adapter->pdev->dev, "Device reset completed successfully\n");
+ }
+
+ rtnl_unlock();
+}
+
+static int check_for_rx_interrupt_queue(struct ena_adapter *adapter,
+ struct ena_ring *rx_ring)
+{
+ struct ena_napi *ena_napi = container_of(rx_ring->napi, struct ena_napi, napi);
+
+ if (likely(READ_ONCE(ena_napi->first_interrupt)))
+ return 0;
+
+ if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
+ return 0;
+
+ rx_ring->no_interrupt_event_cnt++;
+
+ if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
+ netif_err(adapter, rx_err, adapter->netdev,
+ "Potential MSIX issue on Rx side Queue = %d. Reset the device\n",
+ rx_ring->qid);
+
+ ena_reset_device(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
+ struct ena_ring *tx_ring)
+{
+ struct ena_napi *ena_napi = container_of(tx_ring->napi, struct ena_napi, napi);
+ unsigned int time_since_last_napi;
+ unsigned int missing_tx_comp_to;
+ bool is_tx_comp_time_expired;
+ struct ena_tx_buffer *tx_buf;
+ unsigned long last_jiffies;
+ u32 missed_tx = 0;
+ int i, rc = 0;
+
+ for (i = 0; i < tx_ring->ring_size; i++) {
+ tx_buf = &tx_ring->tx_buffer_info[i];
+ last_jiffies = tx_buf->last_jiffies;
+
+ if (last_jiffies == 0)
+ /* no pending Tx at this location */
+ continue;
+
+ is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies +
+ 2 * adapter->missing_tx_completion_to);
+
+ if (unlikely(!READ_ONCE(ena_napi->first_interrupt) && is_tx_comp_time_expired)) {
+ /* If after graceful period interrupt is still not
+ * received, we schedule a reset
+ */
+ netif_err(adapter, tx_err, adapter->netdev,
+ "Potential MSIX issue on Tx side Queue = %d. Reset the device\n",
+ tx_ring->qid);
+ ena_reset_device(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
+ return -EIO;
+ }
+
+ is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies +
+ adapter->missing_tx_completion_to);
+
+ if (unlikely(is_tx_comp_time_expired)) {
+ if (!tx_buf->print_once) {
+ time_since_last_napi = jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies);
+ missing_tx_comp_to = jiffies_to_msecs(adapter->missing_tx_completion_to);
+ netif_notice(adapter, tx_err, adapter->netdev,
+ "Found a Tx that wasn't completed on time, qid %d, index %d. %u usecs have passed since last napi execution. Missing Tx timeout value %u msecs\n",
+ tx_ring->qid, i, time_since_last_napi, missing_tx_comp_to);
+ }
+
+ tx_buf->print_once = 1;
+ missed_tx++;
+ }
+ }
+
+ if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
+ netif_err(adapter, tx_err, adapter->netdev,
+ "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
+ missed_tx,
+ adapter->missing_tx_completion_threshold);
+ ena_reset_device(adapter, ENA_REGS_RESET_MISS_TX_CMPL);
+ rc = -EIO;
+ }
+
+ ena_increase_stat(&tx_ring->tx_stats.missed_tx, missed_tx,
+ &tx_ring->syncp);
+
+ return rc;
+}
+
+static void check_for_missing_completions(struct ena_adapter *adapter)
+{
+ struct ena_ring *tx_ring;
+ struct ena_ring *rx_ring;
+ int i, budget, rc;
+ int io_queue_count;
+
+ io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
+ /* Make sure the driver doesn't turn the device in other process */
+ smp_rmb();
+
+ if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+ return;
+
+ if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+ return;
+
+ if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
+ return;
+
+ budget = ENA_MONITORED_TX_QUEUES;
+
+ for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) {
+ tx_ring = &adapter->tx_ring[i];
+ rx_ring = &adapter->rx_ring[i];
+
+ rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
+ if (unlikely(rc))
+ return;
+
+ rc = !ENA_IS_XDP_INDEX(adapter, i) ?
+ check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
+ if (unlikely(rc))
+ return;
+
+ budget--;
+ if (!budget)
+ break;
+ }
+
+ adapter->last_monitored_tx_qid = i % io_queue_count;
+}
+
+/* trigger napi schedule after 2 consecutive detections */
+#define EMPTY_RX_REFILL 2
+/* For the rare case where the device runs out of Rx descriptors and the
+ * napi handler failed to refill new Rx descriptors (due to a lack of memory
+ * for example).
+ * This case will lead to a deadlock:
+ * The device won't send interrupts since all the new Rx packets will be dropped
+ * The napi handler won't allocate new Rx descriptors so the device will be
+ * able to send new packets.
+ *
+ * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
+ * It is recommended to have at least 512MB, with a minimum of 128MB for
+ * constrained environment).
+ *
+ * When such a situation is detected - Reschedule napi
+ */
+static void check_for_empty_rx_ring(struct ena_adapter *adapter)
+{
+ struct ena_ring *rx_ring;
+ int i, refill_required;
+
+ if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+ return;
+
+ if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+ return;
+
+ for (i = 0; i < adapter->num_io_queues; i++) {
+ rx_ring = &adapter->rx_ring[i];
+
+ refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
+ if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
+ rx_ring->empty_rx_queue++;
+
+ if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
+ ena_increase_stat(&rx_ring->rx_stats.empty_rx_ring, 1,
+ &rx_ring->syncp);
+
+ netif_err(adapter, drv, adapter->netdev,
+ "Trigger refill for ring %d\n", i);
+
+ napi_schedule(rx_ring->napi);
+ rx_ring->empty_rx_queue = 0;
+ }
+ } else {
+ rx_ring->empty_rx_queue = 0;
+ }
+ }
+}
+
+/* Check for keep alive expiration */
+static void check_for_missing_keep_alive(struct ena_adapter *adapter)
+{
+ unsigned long keep_alive_expired;
+
+ if (!adapter->wd_state)
+ return;
+
+ if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
+ return;
+
+ keep_alive_expired = adapter->last_keep_alive_jiffies +
+ adapter->keep_alive_timeout;
+ if (unlikely(time_is_before_jiffies(keep_alive_expired))) {
+ netif_err(adapter, drv, adapter->netdev,
+ "Keep alive watchdog timeout.\n");
+ ena_increase_stat(&adapter->dev_stats.wd_expired, 1,
+ &adapter->syncp);
+ ena_reset_device(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
+ }
+}
+
+static void check_for_admin_com_state(struct ena_adapter *adapter)
+{
+ if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) {
+ netif_err(adapter, drv, adapter->netdev,
+ "ENA admin queue is not in running state!\n");
+ ena_increase_stat(&adapter->dev_stats.admin_q_pause, 1,
+ &adapter->syncp);
+ ena_reset_device(adapter, ENA_REGS_RESET_ADMIN_TO);
+ }
+}
+
+static void ena_update_hints(struct ena_adapter *adapter,
+ struct ena_admin_ena_hw_hints *hints)
+{
+ struct net_device *netdev = adapter->netdev;
+
+ if (hints->admin_completion_tx_timeout)
+ adapter->ena_dev->admin_queue.completion_timeout =
+ hints->admin_completion_tx_timeout * 1000;
+
+ if (hints->mmio_read_timeout)
+ /* convert to usec */
+ adapter->ena_dev->mmio_read.reg_read_to =
+ hints->mmio_read_timeout * 1000;
+
+ if (hints->missed_tx_completion_count_threshold_to_reset)
+ adapter->missing_tx_completion_threshold =
+ hints->missed_tx_completion_count_threshold_to_reset;
+
+ if (hints->missing_tx_completion_timeout) {
+ if (hints->missing_tx_completion_timeout == ENA_HW_HINTS_NO_TIMEOUT)
+ adapter->missing_tx_completion_to = ENA_HW_HINTS_NO_TIMEOUT;
+ else
+ adapter->missing_tx_completion_to =
+ msecs_to_jiffies(hints->missing_tx_completion_timeout);
+ }
+
+ if (hints->netdev_wd_timeout)
+ netdev->watchdog_timeo = msecs_to_jiffies(hints->netdev_wd_timeout);
+
+ if (hints->driver_watchdog_timeout) {
+ if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
+ adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
+ else
+ adapter->keep_alive_timeout =
+ msecs_to_jiffies(hints->driver_watchdog_timeout);
+ }
+}
+
+static void ena_update_host_info(struct ena_admin_host_info *host_info,
+ struct net_device *netdev)
+{
+ host_info->supported_network_features[0] =
+ netdev->features & GENMASK_ULL(31, 0);
+ host_info->supported_network_features[1] =
+ (netdev->features & GENMASK_ULL(63, 32)) >> 32;
+}
+
+static void ena_timer_service(struct timer_list *t)
+{
+ struct ena_adapter *adapter = from_timer(adapter, t, timer_service);
+ u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr;
+ struct ena_admin_host_info *host_info =
+ adapter->ena_dev->host_attr.host_info;
+
+ check_for_missing_keep_alive(adapter);
+
+ check_for_admin_com_state(adapter);
+
+ check_for_missing_completions(adapter);
+
+ check_for_empty_rx_ring(adapter);
+
+ if (debug_area)
+ ena_dump_stats_to_buf(adapter, debug_area);
+
+ if (host_info)
+ ena_update_host_info(host_info, adapter->netdev);
+
+ if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+ netif_err(adapter, drv, adapter->netdev,
+ "Trigger reset is on\n");
+ ena_dump_stats_to_dmesg(adapter);
+ queue_work(ena_wq, &adapter->reset_task);
+ return;
+ }
+
+ /* Reset the timer */
+ mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
+}
+
+static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev,
+ struct ena_com_dev *ena_dev,
+ struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+ u32 io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
+
+ if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
+ struct ena_admin_queue_ext_feature_fields *max_queue_ext =
+ &get_feat_ctx->max_queue_ext.max_queue_ext;
+ io_rx_num = min_t(u32, max_queue_ext->max_rx_sq_num,
+ max_queue_ext->max_rx_cq_num);
+
+ io_tx_sq_num = max_queue_ext->max_tx_sq_num;
+ io_tx_cq_num = max_queue_ext->max_tx_cq_num;
+ } else {
+ struct ena_admin_queue_feature_desc *max_queues =
+ &get_feat_ctx->max_queues;
+ io_tx_sq_num = max_queues->max_sq_num;
+ io_tx_cq_num = max_queues->max_cq_num;
+ io_rx_num = min_t(u32, io_tx_sq_num, io_tx_cq_num);
+ }
+
+ /* In case of LLQ use the llq fields for the tx SQ/CQ */
+ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+ io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
+
+ max_num_io_queues = min_t(u32, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
+ max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num);
+ max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num);
+ max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num);
+ /* 1 IRQ for mgmnt and 1 IRQs for each IO direction */
+ max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1);
+
+ return max_num_io_queues;
+}
+
+static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
+ struct net_device *netdev)
+{
+ netdev_features_t dev_features = 0;
+
+ /* Set offload features */
+ if (feat->offload.tx &
+ ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)
+ dev_features |= NETIF_F_IP_CSUM;
+
+ if (feat->offload.tx &
+ ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)
+ dev_features |= NETIF_F_IPV6_CSUM;
+
+ if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
+ dev_features |= NETIF_F_TSO;
+
+ if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK)
+ dev_features |= NETIF_F_TSO6;
+
+ if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK)
+ dev_features |= NETIF_F_TSO_ECN;
+
+ if (feat->offload.rx_supported &
+ ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK)
+ dev_features |= NETIF_F_RXCSUM;
+
+ if (feat->offload.rx_supported &
+ ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
+ dev_features |= NETIF_F_RXCSUM;
+
+ netdev->features =
+ dev_features |
+ NETIF_F_SG |
+ NETIF_F_RXHASH |
+ NETIF_F_HIGHDMA;
+
+ netdev->hw_features |= netdev->features;
+ netdev->vlan_features |= netdev->features;
+}
+
+static void ena_set_conf_feat_params(struct ena_adapter *adapter,
+ struct ena_com_dev_get_features_ctx *feat)
+{
+ struct net_device *netdev = adapter->netdev;
+
+ /* Copy mac address */
+ if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) {
+ eth_hw_addr_random(netdev);
+ ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
+ } else {
+ ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr);
+ eth_hw_addr_set(netdev, adapter->mac_addr);
+ }
+
+ /* Set offload features */
+ ena_set_dev_offloads(feat, netdev);
+
+ adapter->max_mtu = feat->dev_attr.max_mtu;
+ netdev->max_mtu = adapter->max_mtu;
+ netdev->min_mtu = ENA_MIN_MTU;
+}
+
+static int ena_rss_init_default(struct ena_adapter *adapter)
+{
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ struct device *dev = &adapter->pdev->dev;
+ int rc, i;
+ u32 val;
+
+ rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
+ if (unlikely(rc)) {
+ dev_err(dev, "Cannot init indirect table\n");
+ goto err_rss_init;
+ }
+
+ for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
+ val = ethtool_rxfh_indir_default(i, adapter->num_io_queues);
+ rc = ena_com_indirect_table_fill_entry(ena_dev, i,
+ ENA_IO_RXQ_IDX(val));
+ if (unlikely(rc)) {
+ dev_err(dev, "Cannot fill indirect table\n");
+ goto err_fill_indir;
+ }
+ }
+
+ rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL,
+ ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
+ if (unlikely(rc && (rc != -EOPNOTSUPP))) {
+ dev_err(dev, "Cannot fill hash function\n");
+ goto err_fill_indir;
+ }
+
+ rc = ena_com_set_default_hash_ctrl(ena_dev);
+ if (unlikely(rc && (rc != -EOPNOTSUPP))) {
+ dev_err(dev, "Cannot fill hash control\n");
+ goto err_fill_indir;
+ }
+
+ return 0;
+
+err_fill_indir:
+ ena_com_rss_destroy(ena_dev);
+err_rss_init:
+
+ return rc;
+}
+
+static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
+{
+ int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
+
+ pci_release_selected_regions(pdev, release_bars);
+}
+
+/* ena_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in ena_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * ena_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ */
+static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ struct ena_com_dev_get_features_ctx get_feat_ctx;
+ struct ena_com_dev *ena_dev = NULL;
+ struct ena_adapter *adapter;
+ struct net_device *netdev;
+ static int adapters_found;
+ u32 max_num_io_queues;
+ bool wd_state;
+ int bars, rc;
+
+ dev_dbg(&pdev->dev, "%s\n", __func__);
+
+ rc = pci_enable_device_mem(pdev);
+ if (rc) {
+ dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
+ return rc;
+ }
+
+ rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(ENA_MAX_PHYS_ADDR_SIZE_BITS));
+ if (rc) {
+ dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", rc);
+ goto err_disable_device;
+ }
+
+ pci_set_master(pdev);
+
+ ena_dev = vzalloc(sizeof(*ena_dev));
+ if (!ena_dev) {
+ rc = -ENOMEM;
+ goto err_disable_device;
+ }
+
+ bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
+ rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+ if (rc) {
+ dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
+ rc);
+ goto err_free_ena_dev;
+ }
+
+ ena_dev->reg_bar = devm_ioremap(&pdev->dev,
+ pci_resource_start(pdev, ENA_REG_BAR),
+ pci_resource_len(pdev, ENA_REG_BAR));
+ if (!ena_dev->reg_bar) {
+ dev_err(&pdev->dev, "Failed to remap regs bar\n");
+ rc = -EFAULT;
+ goto err_free_region;
+ }
+
+ ena_dev->ena_min_poll_delay_us = ENA_ADMIN_POLL_DELAY_US;
+
+ ena_dev->dmadev = &pdev->dev;
+
+ netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), ENA_MAX_RINGS);
+ if (!netdev) {
+ dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
+ rc = -ENOMEM;
+ goto err_free_region;
+ }
+
+ SET_NETDEV_DEV(netdev, &pdev->dev);
+ adapter = netdev_priv(netdev);
+ adapter->ena_dev = ena_dev;
+ adapter->netdev = netdev;
+ adapter->pdev = pdev;
+ adapter->msg_enable = DEFAULT_MSG_ENABLE;
+
+ ena_dev->net_device = netdev;
+
+ pci_set_drvdata(pdev, adapter);
+
+ rc = ena_map_llq_mem_bar(pdev, ena_dev, bars);
+ if (rc) {
+ dev_err(&pdev->dev, "ENA LLQ bar mapping failed\n");
+ goto err_netdev_destroy;
+ }
+
+ rc = ena_device_init(adapter, pdev, &get_feat_ctx, &wd_state);
+ if (rc) {
+ dev_err(&pdev->dev, "ENA device init failed\n");
+ if (rc == -ETIME)
+ rc = -EPROBE_DEFER;
+ goto err_netdev_destroy;
+ }
+
+ /* Initial TX and RX interrupt delay. Assumes 1 usec granularity.
+ * Updated during device initialization with the real granularity
+ */
+ ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
+ ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS;
+ ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
+ max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx);
+ if (unlikely(!max_num_io_queues)) {
+ rc = -EFAULT;
+ goto err_device_destroy;
+ }
+
+ ena_set_conf_feat_params(adapter, &get_feat_ctx);
+
+ adapter->reset_reason = ENA_REGS_RESET_NORMAL;
+
+ adapter->num_io_queues = max_num_io_queues;
+ adapter->max_num_io_queues = max_num_io_queues;
+ adapter->last_monitored_tx_qid = 0;
+
+ adapter->xdp_first_ring = 0;
+ adapter->xdp_num_queues = 0;
+
+ adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
+ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+ adapter->disable_meta_caching =
+ !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags &
+ BIT(ENA_ADMIN_DISABLE_META_CACHING));
+
+ adapter->wd_state = wd_state;
+
+ snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found);
+
+ rc = ena_com_init_interrupt_moderation(adapter->ena_dev);
+ if (rc) {
+ dev_err(&pdev->dev,
+ "Failed to query interrupt moderation feature\n");
+ goto err_device_destroy;
+ }
+
+ ena_init_io_rings(adapter,
+ 0,
+ adapter->xdp_num_queues +
+ adapter->num_io_queues);
+
+ netdev->netdev_ops = &ena_netdev_ops;
+ netdev->watchdog_timeo = TX_TIMEOUT;
+ ena_set_ethtool_ops(netdev);
+
+ netdev->priv_flags |= IFF_UNICAST_FLT;
+
+ u64_stats_init(&adapter->syncp);
+
+ rc = ena_enable_msix_and_set_admin_interrupts(adapter);
+ if (rc) {
+ dev_err(&pdev->dev,
+ "Failed to enable and set the admin interrupts\n");
+ goto err_worker_destroy;
+ }
+ rc = ena_rss_init_default(adapter);
+ if (rc && (rc != -EOPNOTSUPP)) {
+ dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc);
+ goto err_free_msix;
+ }
+
+ ena_config_debug_area(adapter);
+
+ if (ena_xdp_legal_queue_count(adapter, adapter->num_io_queues))
+ netdev->xdp_features = NETDEV_XDP_ACT_BASIC |
+ NETDEV_XDP_ACT_REDIRECT;
+
+ memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
+
+ netif_carrier_off(netdev);
+
+ rc = register_netdev(netdev);
+ if (rc) {
+ dev_err(&pdev->dev, "Cannot register net device\n");
+ goto err_rss;
+ }
+
+ INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
+
+ adapter->last_keep_alive_jiffies = jiffies;
+ adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT;
+ adapter->missing_tx_completion_to = TX_TIMEOUT;
+ adapter->missing_tx_completion_threshold = MAX_NUM_OF_TIMEOUTED_PACKETS;
+
+ ena_update_hints(adapter, &get_feat_ctx.hw_hints);
+
+ timer_setup(&adapter->timer_service, ena_timer_service, 0);
+ mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
+
+ dev_info(&pdev->dev,
+ "%s found at mem %lx, mac addr %pM\n",
+ DEVICE_NAME, (long)pci_resource_start(pdev, 0),
+ netdev->dev_addr);
+
+ set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
+
+ adapters_found++;
+
+ return 0;
+
+err_rss:
+ ena_com_delete_debug_area(ena_dev);
+ ena_com_rss_destroy(ena_dev);
+err_free_msix:
+ ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR);
+ /* stop submitting admin commands on a device that was reset */
+ ena_com_set_admin_running_state(ena_dev, false);
+ ena_free_mgmnt_irq(adapter);
+ ena_disable_msix(adapter);
+err_worker_destroy:
+ del_timer(&adapter->timer_service);
+err_device_destroy:
+ ena_com_delete_host_info(ena_dev);
+ ena_com_admin_destroy(ena_dev);
+err_netdev_destroy:
+ free_netdev(netdev);
+err_free_region:
+ ena_release_bars(ena_dev, pdev);
+err_free_ena_dev:
+ vfree(ena_dev);
+err_disable_device:
+ pci_disable_device(pdev);
+ return rc;
+}
+
+/*****************************************************************************/
+
+/* __ena_shutoff - Helper used in both PCI remove/shutdown routines
+ * @pdev: PCI device information struct
+ * @shutdown: Is it a shutdown operation? If false, means it is a removal
+ *
+ * __ena_shutoff is a helper routine that does the real work on shutdown and
+ * removal paths; the difference between those paths is with regards to whether
+ * dettach or unregister the netdevice.
+ */
+static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
+{
+ struct ena_adapter *adapter = pci_get_drvdata(pdev);
+ struct ena_com_dev *ena_dev;
+ struct net_device *netdev;
+
+ ena_dev = adapter->ena_dev;
+ netdev = adapter->netdev;
+
+#ifdef CONFIG_RFS_ACCEL
+ if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
+ free_irq_cpu_rmap(netdev->rx_cpu_rmap);
+ netdev->rx_cpu_rmap = NULL;
+ }
+#endif /* CONFIG_RFS_ACCEL */
+
+ /* Make sure timer and reset routine won't be called after
+ * freeing device resources.
+ */
+ del_timer_sync(&adapter->timer_service);
+ cancel_work_sync(&adapter->reset_task);
+
+ rtnl_lock(); /* lock released inside the below if-else block */
+ adapter->reset_reason = ENA_REGS_RESET_SHUTDOWN;
+ ena_destroy_device(adapter, true);
+
+ if (shutdown) {
+ netif_device_detach(netdev);
+ dev_close(netdev);
+ rtnl_unlock();
+ } else {
+ rtnl_unlock();
+ unregister_netdev(netdev);
+ free_netdev(netdev);
+ }
+
+ ena_com_rss_destroy(ena_dev);
+
+ ena_com_delete_debug_area(ena_dev);
+
+ ena_com_delete_host_info(ena_dev);
+
+ ena_release_bars(ena_dev, pdev);
+
+ pci_disable_device(pdev);
+
+ vfree(ena_dev);
+}
+
+/* ena_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * ena_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.
+ */
+
+static void ena_remove(struct pci_dev *pdev)
+{
+ __ena_shutoff(pdev, false);
+}
+
+/* ena_shutdown - Device Shutdown Routine
+ * @pdev: PCI device information struct
+ *
+ * ena_shutdown is called by the PCI subsystem to alert the driver that
+ * a shutdown/reboot (or kexec) is happening and device must be disabled.
+ */
+
+static void ena_shutdown(struct pci_dev *pdev)
+{
+ __ena_shutoff(pdev, true);
+}
+
+/* ena_suspend - PM suspend callback
+ * @dev_d: Device information struct
+ */
+static int __maybe_unused ena_suspend(struct device *dev_d)
+{
+ struct pci_dev *pdev = to_pci_dev(dev_d);
+ struct ena_adapter *adapter = pci_get_drvdata(pdev);
+
+ ena_increase_stat(&adapter->dev_stats.suspend, 1, &adapter->syncp);
+
+ rtnl_lock();
+ if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+ dev_err(&pdev->dev,
+ "Ignoring device reset request as the device is being suspended\n");
+ clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ }
+ ena_destroy_device(adapter, true);
+ rtnl_unlock();
+ return 0;
+}
+
+/* ena_resume - PM resume callback
+ * @dev_d: Device information struct
+ */
+static int __maybe_unused ena_resume(struct device *dev_d)
+{
+ struct ena_adapter *adapter = dev_get_drvdata(dev_d);
+ int rc;
+
+ ena_increase_stat(&adapter->dev_stats.resume, 1, &adapter->syncp);
+
+ rtnl_lock();
+ rc = ena_restore_device(adapter);
+ rtnl_unlock();
+ return rc;
+}
+
+static SIMPLE_DEV_PM_OPS(ena_pm_ops, ena_suspend, ena_resume);
+
+static struct pci_driver ena_pci_driver = {
+ .name = DRV_MODULE_NAME,
+ .id_table = ena_pci_tbl,
+ .probe = ena_probe,
+ .remove = ena_remove,
+ .shutdown = ena_shutdown,
+ .driver.pm = &ena_pm_ops,
+ .sriov_configure = pci_sriov_configure_simple,
+};
+
+static int __init ena_init(void)
+{
+ int ret;
+
+ ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME);
+ if (!ena_wq) {
+ pr_err("Failed to create workqueue\n");
+ return -ENOMEM;
+ }
+
+ ret = pci_register_driver(&ena_pci_driver);
+ if (ret)
+ destroy_workqueue(ena_wq);
+
+ return ret;
+}
+
+static void __exit ena_cleanup(void)
+{
+ pci_unregister_driver(&ena_pci_driver);
+
+ if (ena_wq) {
+ destroy_workqueue(ena_wq);
+ ena_wq = NULL;
+ }
+}
+
+/******************************************************************************
+ ******************************** AENQ Handlers *******************************
+ *****************************************************************************/
+/* ena_update_on_link_change:
+ * Notify the network interface about the change in link status
+ */
+static void ena_update_on_link_change(void *adapter_data,
+ struct ena_admin_aenq_entry *aenq_e)
+{
+ struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+ struct ena_admin_aenq_link_change_desc *aenq_desc =
+ (struct ena_admin_aenq_link_change_desc *)aenq_e;
+ int status = aenq_desc->flags &
+ ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
+
+ if (status) {
+ netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
+ set_bit(ENA_FLAG_LINK_UP, &adapter->flags);
+ if (!test_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags))
+ netif_carrier_on(adapter->netdev);
+ } else {
+ clear_bit(ENA_FLAG_LINK_UP, &adapter->flags);
+ netif_carrier_off(adapter->netdev);
+ }
+}
+
+static void ena_keep_alive_wd(void *adapter_data,
+ struct ena_admin_aenq_entry *aenq_e)
+{
+ struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+ struct ena_admin_aenq_keep_alive_desc *desc;
+ u64 rx_drops;
+ u64 tx_drops;
+
+ desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
+ adapter->last_keep_alive_jiffies = jiffies;
+
+ rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low;
+ tx_drops = ((u64)desc->tx_drops_high << 32) | desc->tx_drops_low;
+
+ u64_stats_update_begin(&adapter->syncp);
+ /* These stats are accumulated by the device, so the counters indicate
+ * all drops since last reset.
+ */
+ adapter->dev_stats.rx_drops = rx_drops;
+ adapter->dev_stats.tx_drops = tx_drops;
+ u64_stats_update_end(&adapter->syncp);
+}
+
+static void ena_notification(void *adapter_data,
+ struct ena_admin_aenq_entry *aenq_e)
+{
+ struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+ struct ena_admin_ena_hw_hints *hints;
+
+ WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
+ "Invalid group(%x) expected %x\n",
+ aenq_e->aenq_common_desc.group,
+ ENA_ADMIN_NOTIFICATION);
+
+ switch (aenq_e->aenq_common_desc.syndrome) {
+ case ENA_ADMIN_UPDATE_HINTS:
+ hints = (struct ena_admin_ena_hw_hints *)
+ (&aenq_e->inline_data_w4);
+ ena_update_hints(adapter, hints);
+ break;
+ default:
+ netif_err(adapter, drv, adapter->netdev,
+ "Invalid aenq notification link state %d\n",
+ aenq_e->aenq_common_desc.syndrome);
+ }
+}
+
+/* This handler will called for unknown event group or unimplemented handlers*/
+static void unimplemented_aenq_handler(void *data,
+ struct ena_admin_aenq_entry *aenq_e)
+{
+ struct ena_adapter *adapter = (struct ena_adapter *)data;
+
+ netif_err(adapter, drv, adapter->netdev,
+ "Unknown event was received or event with unimplemented handler\n");
+}
+
+static struct ena_aenq_handlers aenq_handlers = {
+ .handlers = {
+ [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
+ [ENA_ADMIN_NOTIFICATION] = ena_notification,
+ [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
+ },
+ .unimplemented_handler = unimplemented_aenq_handler
+};
+
+module_init(ena_init);
+module_exit(ena_cleanup);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
new file mode 100644
index 0000000000..33c923e126
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -0,0 +1,467 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef ENA_H
+#define ENA_H
+
+#include <linux/bitops.h>
+#include <linux/dim.h>
+#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/inetdevice.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/xdp.h>
+#include <uapi/linux/bpf.h>
+
+#include "ena_com.h"
+#include "ena_eth_com.h"
+
+#define DRV_MODULE_GEN_MAJOR 2
+#define DRV_MODULE_GEN_MINOR 1
+#define DRV_MODULE_GEN_SUBMINOR 0
+
+#define DRV_MODULE_NAME "ena"
+
+#define DEVICE_NAME "Elastic Network Adapter (ENA)"
+
+/* 1 for AENQ + ADMIN */
+#define ENA_ADMIN_MSIX_VEC 1
+#define ENA_MAX_MSIX_VEC(io_queues) (ENA_ADMIN_MSIX_VEC + (io_queues))
+
+/* The ENA buffer length fields is 16 bit long. So when PAGE_SIZE == 64kB the
+ * driver passes 0.
+ * Since the max packet size the ENA handles is ~9kB limit the buffer length to
+ * 16kB.
+ */
+#if PAGE_SIZE > SZ_16K
+#define ENA_PAGE_SIZE (_AC(SZ_16K, UL))
+#else
+#define ENA_PAGE_SIZE PAGE_SIZE
+#endif
+
+#define ENA_MIN_MSIX_VEC 2
+
+#define ENA_REG_BAR 0
+#define ENA_MEM_BAR 2
+#define ENA_BAR_MASK (BIT(ENA_REG_BAR) | BIT(ENA_MEM_BAR))
+
+#define ENA_DEFAULT_RING_SIZE (1024)
+#define ENA_MIN_RING_SIZE (256)
+
+#define ENA_MIN_RX_BUF_SIZE (2048)
+
+#define ENA_MIN_NUM_IO_QUEUES (1)
+
+#define ENA_TX_WAKEUP_THRESH (MAX_SKB_FRAGS + 2)
+#define ENA_DEFAULT_RX_COPYBREAK (256 - NET_IP_ALIGN)
+
+#define ENA_MIN_MTU 128
+
+#define ENA_NAME_MAX_LEN 20
+#define ENA_IRQNAME_SIZE 40
+
+#define ENA_PKT_MAX_BUFS 19
+
+#define ENA_RX_RSS_TABLE_LOG_SIZE 7
+#define ENA_RX_RSS_TABLE_SIZE (1 << ENA_RX_RSS_TABLE_LOG_SIZE)
+
+/* The number of tx packet completions that will be handled each NAPI poll
+ * cycle is ring_size / ENA_TX_POLL_BUDGET_DIVIDER.
+ */
+#define ENA_TX_POLL_BUDGET_DIVIDER 4
+
+/* Refill Rx queue when number of required descriptors is above
+ * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER or ENA_RX_REFILL_THRESH_PACKET
+ */
+#define ENA_RX_REFILL_THRESH_DIVIDER 8
+#define ENA_RX_REFILL_THRESH_PACKET 256
+
+/* Number of queues to check for missing queues per timer service */
+#define ENA_MONITORED_TX_QUEUES 4
+/* Max timeout packets before device reset */
+#define MAX_NUM_OF_TIMEOUTED_PACKETS 128
+
+#define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
+
+#define ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
+#define ENA_RX_RING_IDX_ADD(idx, n, ring_size) \
+ (((idx) + (n)) & ((ring_size) - 1))
+
+#define ENA_IO_TXQ_IDX(q) (2 * (q))
+#define ENA_IO_RXQ_IDX(q) (2 * (q) + 1)
+#define ENA_IO_TXQ_IDX_TO_COMBINED_IDX(q) ((q) / 2)
+#define ENA_IO_RXQ_IDX_TO_COMBINED_IDX(q) (((q) - 1) / 2)
+
+#define ENA_MGMNT_IRQ_IDX 0
+#define ENA_IO_IRQ_FIRST_IDX 1
+#define ENA_IO_IRQ_IDX(q) (ENA_IO_IRQ_FIRST_IDX + (q))
+
+#define ENA_ADMIN_POLL_DELAY_US 100
+
+/* ENA device should send keep alive msg every 1 sec.
+ * We wait for 6 sec just to be on the safe side.
+ */
+#define ENA_DEVICE_KALIVE_TIMEOUT (6 * HZ)
+#define ENA_MAX_NO_INTERRUPT_ITERATIONS 3
+
+#define ENA_MMIO_DISABLE_REG_READ BIT(0)
+
+/* The max MTU size is configured to be the ethernet frame size without
+ * the overhead of the ethernet header, which can have a VLAN header, and
+ * a frame check sequence (FCS).
+ * The buffer size we share with the device is defined to be ENA_PAGE_SIZE
+ */
+
+#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \
+ VLAN_HLEN - XDP_PACKET_HEADROOM - \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
+#define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \
+ ((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues))
+
+struct ena_irq {
+ irq_handler_t handler;
+ void *data;
+ int cpu;
+ u32 vector;
+ cpumask_t affinity_hint_mask;
+ char name[ENA_IRQNAME_SIZE];
+};
+
+struct ena_napi {
+ u8 first_interrupt ____cacheline_aligned;
+ u8 interrupts_masked;
+ struct napi_struct napi;
+ struct ena_ring *tx_ring;
+ struct ena_ring *rx_ring;
+ struct ena_ring *xdp_ring;
+ u32 qid;
+ struct dim dim;
+};
+
+struct ena_tx_buffer {
+ struct sk_buff *skb;
+ /* num of ena desc for this specific skb
+ * (includes data desc and metadata desc)
+ */
+ u32 tx_descs;
+ /* num of buffers used by this skb */
+ u32 num_of_bufs;
+
+ /* XDP buffer structure which is used for sending packets in
+ * the xdp queues
+ */
+ struct xdp_frame *xdpf;
+
+ /* Indicate if bufs[0] map the linear data of the skb. */
+ u8 map_linear_data;
+
+ /* Used for detect missing tx packets to limit the number of prints */
+ u32 print_once;
+ /* Save the last jiffies to detect missing tx packets
+ *
+ * sets to non zero value on ena_start_xmit and set to zero on
+ * napi and timer_Service_routine.
+ *
+ * while this value is not protected by lock,
+ * a given packet is not expected to be handled by ena_start_xmit
+ * and by napi/timer_service at the same time.
+ */
+ unsigned long last_jiffies;
+ struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
+} ____cacheline_aligned;
+
+struct ena_rx_buffer {
+ struct sk_buff *skb;
+ struct page *page;
+ dma_addr_t dma_addr;
+ u32 page_offset;
+ u32 buf_offset;
+ struct ena_com_buf ena_buf;
+} ____cacheline_aligned;
+
+struct ena_stats_tx {
+ u64 cnt;
+ u64 bytes;
+ u64 queue_stop;
+ u64 prepare_ctx_err;
+ u64 queue_wakeup;
+ u64 dma_mapping_err;
+ u64 linearize;
+ u64 linearize_failed;
+ u64 napi_comp;
+ u64 tx_poll;
+ u64 doorbells;
+ u64 bad_req_id;
+ u64 llq_buffer_copy;
+ u64 missed_tx;
+ u64 unmask_interrupt;
+ u64 last_napi_jiffies;
+};
+
+struct ena_stats_rx {
+ u64 cnt;
+ u64 bytes;
+ u64 rx_copybreak_pkt;
+ u64 csum_good;
+ u64 refil_partial;
+ u64 csum_bad;
+ u64 page_alloc_fail;
+ u64 skb_alloc_fail;
+ u64 dma_mapping_err;
+ u64 bad_desc_num;
+ u64 bad_req_id;
+ u64 empty_rx_ring;
+ u64 csum_unchecked;
+ u64 xdp_aborted;
+ u64 xdp_drop;
+ u64 xdp_pass;
+ u64 xdp_tx;
+ u64 xdp_invalid;
+ u64 xdp_redirect;
+};
+
+struct ena_ring {
+ /* Holds the empty requests for TX/RX
+ * out of order completions
+ */
+ u16 *free_ids;
+
+ union {
+ struct ena_tx_buffer *tx_buffer_info;
+ struct ena_rx_buffer *rx_buffer_info;
+ };
+
+ /* cache ptr to avoid using the adapter */
+ struct device *dev;
+ struct pci_dev *pdev;
+ struct napi_struct *napi;
+ struct net_device *netdev;
+ struct ena_com_dev *ena_dev;
+ struct ena_adapter *adapter;
+ struct ena_com_io_cq *ena_com_io_cq;
+ struct ena_com_io_sq *ena_com_io_sq;
+ struct bpf_prog *xdp_bpf_prog;
+ struct xdp_rxq_info xdp_rxq;
+ spinlock_t xdp_tx_lock; /* synchronize XDP TX/Redirect traffic */
+ /* Used for rx queues only to point to the xdp tx ring, to
+ * which traffic should be redirected from this rx ring.
+ */
+ struct ena_ring *xdp_ring;
+
+ u16 next_to_use;
+ u16 next_to_clean;
+ u16 rx_copybreak;
+ u16 rx_headroom;
+ u16 qid;
+ u16 mtu;
+ u16 sgl_size;
+
+ /* The maximum header length the device can handle */
+ u8 tx_max_header_size;
+
+ bool disable_meta_caching;
+ u16 no_interrupt_event_cnt;
+
+ /* cpu and NUMA for TPH */
+ int cpu;
+ int numa_node;
+
+ /* number of tx/rx_buffer_info's entries */
+ int ring_size;
+
+ enum ena_admin_placement_policy_type tx_mem_queue_type;
+
+ struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS];
+ u32 smoothed_interval;
+ u32 per_napi_packets;
+ u16 non_empty_napi_events;
+ struct u64_stats_sync syncp;
+ union {
+ struct ena_stats_tx tx_stats;
+ struct ena_stats_rx rx_stats;
+ };
+
+ u8 *push_buf_intermediate_buf;
+ int empty_rx_queue;
+} ____cacheline_aligned;
+
+struct ena_stats_dev {
+ u64 tx_timeout;
+ u64 suspend;
+ u64 resume;
+ u64 wd_expired;
+ u64 interface_up;
+ u64 interface_down;
+ u64 admin_q_pause;
+ u64 rx_drops;
+ u64 tx_drops;
+};
+
+enum ena_flags_t {
+ ENA_FLAG_DEVICE_RUNNING,
+ ENA_FLAG_DEV_UP,
+ ENA_FLAG_LINK_UP,
+ ENA_FLAG_MSIX_ENABLED,
+ ENA_FLAG_TRIGGER_RESET,
+ ENA_FLAG_ONGOING_RESET
+};
+
+/* adapter specific private data structure */
+struct ena_adapter {
+ struct ena_com_dev *ena_dev;
+ /* OS defined structs */
+ struct net_device *netdev;
+ struct pci_dev *pdev;
+
+ /* rx packets that shorter that this len will be copied to the skb
+ * header
+ */
+ u32 rx_copybreak;
+ u32 max_mtu;
+
+ u32 num_io_queues;
+ u32 max_num_io_queues;
+
+ int msix_vecs;
+
+ u32 missing_tx_completion_threshold;
+
+ u32 requested_tx_ring_size;
+ u32 requested_rx_ring_size;
+
+ u32 max_tx_ring_size;
+ u32 max_rx_ring_size;
+
+ u32 msg_enable;
+
+ /* large_llq_header_enabled is used for two purposes:
+ * 1. Indicates that large LLQ has been requested.
+ * 2. Indicates whether large LLQ is set or not after device
+ * initialization / configuration.
+ */
+ bool large_llq_header_enabled;
+ bool large_llq_header_supported;
+
+ u16 max_tx_sgl_size;
+ u16 max_rx_sgl_size;
+
+ u8 mac_addr[ETH_ALEN];
+
+ unsigned long keep_alive_timeout;
+ unsigned long missing_tx_completion_to;
+
+ char name[ENA_NAME_MAX_LEN];
+
+ unsigned long flags;
+ /* TX */
+ struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES]
+ ____cacheline_aligned_in_smp;
+
+ /* RX */
+ struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES]
+ ____cacheline_aligned_in_smp;
+
+ struct ena_napi ena_napi[ENA_MAX_NUM_IO_QUEUES];
+
+ struct ena_irq irq_tbl[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)];
+
+ /* timer service */
+ struct work_struct reset_task;
+ struct timer_list timer_service;
+
+ bool wd_state;
+ bool dev_up_before_reset;
+ bool disable_meta_caching;
+ unsigned long last_keep_alive_jiffies;
+
+ struct u64_stats_sync syncp;
+ struct ena_stats_dev dev_stats;
+ struct ena_admin_eni_stats eni_stats;
+
+ /* last queue index that was checked for uncompleted tx packets */
+ u32 last_monitored_tx_qid;
+
+ enum ena_regs_reset_reason_types reset_reason;
+
+ struct bpf_prog *xdp_bpf_prog;
+ u32 xdp_first_ring;
+ u32 xdp_num_queues;
+};
+
+void ena_set_ethtool_ops(struct net_device *netdev);
+
+void ena_dump_stats_to_dmesg(struct ena_adapter *adapter);
+
+void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf);
+
+int ena_update_hw_stats(struct ena_adapter *adapter);
+
+int ena_update_queue_params(struct ena_adapter *adapter,
+ u32 new_tx_size,
+ u32 new_rx_size,
+ u32 new_llq_header_len);
+
+int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count);
+
+int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak);
+
+int ena_get_sset_count(struct net_device *netdev, int sset);
+
+static inline void ena_reset_device(struct ena_adapter *adapter,
+ enum ena_regs_reset_reason_types reset_reason)
+{
+ adapter->reset_reason = reset_reason;
+ /* Make sure reset reason is set before triggering the reset */
+ smp_mb__before_atomic();
+ set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+}
+
+enum ena_xdp_errors_t {
+ ENA_XDP_ALLOWED = 0,
+ ENA_XDP_CURRENT_MTU_TOO_LARGE,
+ ENA_XDP_NO_ENOUGH_QUEUES,
+};
+
+enum ENA_XDP_ACTIONS {
+ ENA_XDP_PASS = 0,
+ ENA_XDP_TX = BIT(0),
+ ENA_XDP_REDIRECT = BIT(1),
+ ENA_XDP_DROP = BIT(2)
+};
+
+#define ENA_XDP_FORWARDED (ENA_XDP_TX | ENA_XDP_REDIRECT)
+
+static inline bool ena_xdp_present(struct ena_adapter *adapter)
+{
+ return !!adapter->xdp_bpf_prog;
+}
+
+static inline bool ena_xdp_present_ring(struct ena_ring *ring)
+{
+ return !!ring->xdp_bpf_prog;
+}
+
+static inline bool ena_xdp_legal_queue_count(struct ena_adapter *adapter,
+ u32 queues)
+{
+ return 2 * queues <= adapter->max_num_io_queues;
+}
+
+static inline enum ena_xdp_errors_t ena_xdp_allowed(struct ena_adapter *adapter)
+{
+ enum ena_xdp_errors_t rc = ENA_XDP_ALLOWED;
+
+ if (adapter->netdev->mtu > ENA_XDP_MAX_MTU)
+ rc = ENA_XDP_CURRENT_MTU_TOO_LARGE;
+ else if (!ena_xdp_legal_queue_count(adapter, adapter->num_io_queues))
+ rc = ENA_XDP_NO_ENOUGH_QUEUES;
+
+ return rc;
+}
+
+#endif /* !(ENA_H) */
diff --git a/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h
new file mode 100644
index 0000000000..3ecdf29160
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef ENA_PCI_ID_TBL_H_
+#define ENA_PCI_ID_TBL_H_
+
+#ifndef PCI_VENDOR_ID_AMAZON
+#define PCI_VENDOR_ID_AMAZON 0x1d0f
+#endif
+
+#ifndef PCI_DEV_ID_ENA_PF
+#define PCI_DEV_ID_ENA_PF 0x0ec2
+#endif
+
+#ifndef PCI_DEV_ID_ENA_LLQ_PF
+#define PCI_DEV_ID_ENA_LLQ_PF 0x1ec2
+#endif
+
+#ifndef PCI_DEV_ID_ENA_VF
+#define PCI_DEV_ID_ENA_VF 0xec20
+#endif
+
+#ifndef PCI_DEV_ID_ENA_LLQ_VF
+#define PCI_DEV_ID_ENA_LLQ_VF 0xec21
+#endif
+
+#ifndef PCI_DEV_ID_ENA_RESRV0
+#define PCI_DEV_ID_ENA_RESRV0 0x0051
+#endif
+
+#define ENA_PCI_ID_TABLE_ENTRY(devid) \
+ {PCI_DEVICE(PCI_VENDOR_ID_AMAZON, devid)},
+
+static const struct pci_device_id ena_pci_tbl[] = {
+ ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_RESRV0)
+ ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_PF)
+ ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_LLQ_PF)
+ ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_VF)
+ ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_LLQ_VF)
+ { }
+};
+
+#endif /* ENA_PCI_ID_TBL_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
new file mode 100644
index 0000000000..1e007a41a5
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+#ifndef _ENA_REGS_H_
+#define _ENA_REGS_H_
+
+enum ena_regs_reset_reason_types {
+ ENA_REGS_RESET_NORMAL = 0,
+ ENA_REGS_RESET_KEEP_ALIVE_TO = 1,
+ ENA_REGS_RESET_ADMIN_TO = 2,
+ ENA_REGS_RESET_MISS_TX_CMPL = 3,
+ ENA_REGS_RESET_INV_RX_REQ_ID = 4,
+ ENA_REGS_RESET_INV_TX_REQ_ID = 5,
+ ENA_REGS_RESET_TOO_MANY_RX_DESCS = 6,
+ ENA_REGS_RESET_INIT_ERR = 7,
+ ENA_REGS_RESET_DRIVER_INVALID_STATE = 8,
+ ENA_REGS_RESET_OS_TRIGGER = 9,
+ ENA_REGS_RESET_OS_NETDEV_WD = 10,
+ ENA_REGS_RESET_SHUTDOWN = 11,
+ ENA_REGS_RESET_USER_TRIGGER = 12,
+ ENA_REGS_RESET_GENERIC = 13,
+ ENA_REGS_RESET_MISS_INTERRUPT = 14,
+};
+
+/* ena_registers offsets */
+
+/* 0 base */
+#define ENA_REGS_VERSION_OFF 0x0
+#define ENA_REGS_CONTROLLER_VERSION_OFF 0x4
+#define ENA_REGS_CAPS_OFF 0x8
+#define ENA_REGS_CAPS_EXT_OFF 0xc
+#define ENA_REGS_AQ_BASE_LO_OFF 0x10
+#define ENA_REGS_AQ_BASE_HI_OFF 0x14
+#define ENA_REGS_AQ_CAPS_OFF 0x18
+#define ENA_REGS_ACQ_BASE_LO_OFF 0x20
+#define ENA_REGS_ACQ_BASE_HI_OFF 0x24
+#define ENA_REGS_ACQ_CAPS_OFF 0x28
+#define ENA_REGS_AQ_DB_OFF 0x2c
+#define ENA_REGS_ACQ_TAIL_OFF 0x30
+#define ENA_REGS_AENQ_CAPS_OFF 0x34
+#define ENA_REGS_AENQ_BASE_LO_OFF 0x38
+#define ENA_REGS_AENQ_BASE_HI_OFF 0x3c
+#define ENA_REGS_AENQ_HEAD_DB_OFF 0x40
+#define ENA_REGS_AENQ_TAIL_OFF 0x44
+#define ENA_REGS_INTR_MASK_OFF 0x4c
+#define ENA_REGS_DEV_CTL_OFF 0x54
+#define ENA_REGS_DEV_STS_OFF 0x58
+#define ENA_REGS_MMIO_REG_READ_OFF 0x5c
+#define ENA_REGS_MMIO_RESP_LO_OFF 0x60
+#define ENA_REGS_MMIO_RESP_HI_OFF 0x64
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_OFF 0x68
+
+/* version register */
+#define ENA_REGS_VERSION_MINOR_VERSION_MASK 0xff
+#define ENA_REGS_VERSION_MAJOR_VERSION_SHIFT 8
+#define ENA_REGS_VERSION_MAJOR_VERSION_MASK 0xff00
+
+/* controller_version register */
+#define ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK 0xff
+#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT 8
+#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK 0xff00
+#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT 16
+#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK 0xff0000
+#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT 24
+#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK 0xff000000
+
+/* caps register */
+#define ENA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK 0x1
+#define ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT 1
+#define ENA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e
+#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT 8
+#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00
+#define ENA_REGS_CAPS_ADMIN_CMD_TO_SHIFT 16
+#define ENA_REGS_CAPS_ADMIN_CMD_TO_MASK 0xf0000
+
+/* aq_caps register */
+#define ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff
+#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT 16
+#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK 0xffff0000
+
+/* acq_caps register */
+#define ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK 0xffff
+#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT 16
+#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK 0xffff0000
+
+/* aenq_caps register */
+#define ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK 0xffff
+#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT 16
+#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK 0xffff0000
+
+/* dev_ctl register */
+#define ENA_REGS_DEV_CTL_DEV_RESET_MASK 0x1
+#define ENA_REGS_DEV_CTL_AQ_RESTART_SHIFT 1
+#define ENA_REGS_DEV_CTL_AQ_RESTART_MASK 0x2
+#define ENA_REGS_DEV_CTL_QUIESCENT_SHIFT 2
+#define ENA_REGS_DEV_CTL_QUIESCENT_MASK 0x4
+#define ENA_REGS_DEV_CTL_IO_RESUME_SHIFT 3
+#define ENA_REGS_DEV_CTL_IO_RESUME_MASK 0x8
+#define ENA_REGS_DEV_CTL_RESET_REASON_SHIFT 28
+#define ENA_REGS_DEV_CTL_RESET_REASON_MASK 0xf0000000
+
+/* dev_sts register */
+#define ENA_REGS_DEV_STS_READY_MASK 0x1
+#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT 1
+#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK 0x2
+#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT 2
+#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK 0x4
+#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT 3
+#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK 0x8
+#define ENA_REGS_DEV_STS_RESET_FINISHED_SHIFT 4
+#define ENA_REGS_DEV_STS_RESET_FINISHED_MASK 0x10
+#define ENA_REGS_DEV_STS_FATAL_ERROR_SHIFT 5
+#define ENA_REGS_DEV_STS_FATAL_ERROR_MASK 0x20
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_SHIFT 6
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_MASK 0x40
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_SHIFT 7
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_MASK 0x80
+
+/* mmio_reg_read register */
+#define ENA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff
+#define ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT 16
+#define ENA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000
+
+/* rss_ind_entry_update register */
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_INDEX_MASK 0xffff
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_SHIFT 16
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_MASK 0xffff0000
+
+#endif /* _ENA_REGS_H_ */