From: Arthur Kiyanovski Date: Thu, 11 Oct 2018 11:26:18 +0300 Subject: [PATCH 04/19] net: ena: introduce Low Latency Queues data structures according to ENA spec Origin: https://git.kernel.org/linus/a7982b8ec947052df6d4467b3a81571f02f528e0 Low Latency Queues(LLQ) allow usage of device's memory for descriptors and headers. Such queues decrease processing time since data is already located on the device when driver rings the doorbell. Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller --- .../net/ethernet/amazon/ena/ena_admin_defs.h | 90 ++++++++++++++++++- drivers/net/ethernet/amazon/ena/ena_com.h | 38 ++++++++ drivers/net/ethernet/amazon/ena/ena_netdev.c | 6 +- 3 files changed, 128 insertions(+), 6 deletions(-) Index: linux/drivers/net/ethernet/amazon/ena/ena_admin_defs.h =================================================================== --- linux.orig/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +++ linux/drivers/net/ethernet/amazon/ena/ena_admin_defs.h @@ -74,6 +74,8 @@ enum ena_admin_aq_feature_id { ENA_ADMIN_HW_HINTS = 3, + ENA_ADMIN_LLQ = 4, + ENA_ADMIN_RSS_HASH_FUNCTION = 10, ENA_ADMIN_STATELESS_OFFLOAD_CONFIG = 11, @@ -485,8 +487,85 @@ struct ena_admin_device_attr_feature_des u32 max_mtu; }; +enum ena_admin_llq_header_location { + /* header is in descriptor list */ + ENA_ADMIN_INLINE_HEADER = 1, + /* header in a separate ring, implies 16B descriptor list entry */ + ENA_ADMIN_HEADER_RING = 2, +}; + +enum ena_admin_llq_ring_entry_size { + ENA_ADMIN_LIST_ENTRY_SIZE_128B = 1, + ENA_ADMIN_LIST_ENTRY_SIZE_192B = 2, + ENA_ADMIN_LIST_ENTRY_SIZE_256B = 4, +}; + +enum ena_admin_llq_num_descs_before_header { + ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_0 = 0, + ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_1 = 1, + ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2 = 2, + ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_4 = 4, + ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_8 = 8, +}; + +/* packet descriptor list entry always starts with one or more descriptors, + * followed by a header. The rest of the descriptors are located in the + * beginning of the subsequent entry. Stride refers to how the rest of the + * descriptors are placed. This field is relevant only for inline header + * mode + */ +enum ena_admin_llq_stride_ctrl { + ENA_ADMIN_SINGLE_DESC_PER_ENTRY = 1, + ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY = 2, +}; + +struct ena_admin_feature_llq_desc { + u32 max_llq_num; + + u32 max_llq_depth; + + /* specify the header locations the device supports. bitfield of + * enum ena_admin_llq_header_location. + */ + u16 header_location_ctrl_supported; + + /* the header location the driver selected to use. */ + u16 header_location_ctrl_enabled; + + /* if inline header is specified - this is the size of descriptor + * list entry. If header in a separate ring is specified - this is + * the size of header ring entry. bitfield of enum + * ena_admin_llq_ring_entry_size. specify the entry sizes the device + * supports + */ + u16 entry_size_ctrl_supported; + + /* the entry size the driver selected to use. */ + u16 entry_size_ctrl_enabled; + + /* valid only if inline header is specified. First entry associated + * with the packet includes descriptors and header. Rest of the + * entries occupied by descriptors. This parameter defines the max + * number of descriptors precedding the header in the first entry. + * The field is bitfield of enum + * ena_admin_llq_num_descs_before_header and specify the values the + * device supports + */ + u16 desc_num_before_header_supported; + + /* the desire field the driver selected to use */ + u16 desc_num_before_header_enabled; + + /* valid only if inline was chosen. bitfield of enum + * ena_admin_llq_stride_ctrl + */ + u16 descriptors_stride_ctrl_supported; + + /* the stride control the driver selected to use */ + u16 descriptors_stride_ctrl_enabled; +}; + struct ena_admin_queue_feature_desc { - /* including LLQs */ u32 max_sq_num; u32 max_sq_depth; @@ -495,9 +574,9 @@ struct ena_admin_queue_feature_desc { u32 max_cq_depth; - u32 max_llq_num; + u32 max_legacy_llq_num; - u32 max_llq_depth; + u32 max_legacy_llq_depth; u32 max_header_size; @@ -822,6 +901,8 @@ struct ena_admin_get_feat_resp { struct ena_admin_device_attr_feature_desc dev_attr; + struct ena_admin_feature_llq_desc llq; + struct ena_admin_queue_feature_desc max_queue; struct ena_admin_feature_aenq_desc aenq; @@ -869,6 +950,9 @@ struct ena_admin_set_feat_cmd { /* rss indirection table */ struct ena_admin_feature_rss_ind_table ind_table; + + /* LLQ configuration */ + struct ena_admin_feature_llq_desc llq; } u; }; Index: linux/drivers/net/ethernet/amazon/ena/ena_com.h =================================================================== --- linux.orig/drivers/net/ethernet/amazon/ena/ena_com.h +++ linux/drivers/net/ethernet/amazon/ena/ena_com.h @@ -108,6 +108,14 @@ enum ena_intr_moder_level { ENA_INTR_MAX_NUM_OF_LEVELS, }; +struct ena_llq_configurations { + enum ena_admin_llq_header_location llq_header_location; + enum ena_admin_llq_ring_entry_size llq_ring_entry_size; + enum ena_admin_llq_stride_ctrl llq_stride_ctrl; + enum ena_admin_llq_num_descs_before_header llq_num_decs_before_header; + u16 llq_ring_entry_size_value; +}; + struct ena_intr_moder_entry { unsigned int intr_moder_interval; unsigned int pkts_per_interval; @@ -142,6 +150,15 @@ struct ena_com_tx_meta { u16 l4_hdr_len; /* In words */ }; +struct ena_com_llq_info { + u16 header_location_ctrl; + u16 desc_stride_ctrl; + u16 desc_list_entry_size_ctrl; + u16 desc_list_entry_size; + u16 descs_num_before_header; + u16 descs_per_entry; +}; + struct ena_com_io_cq { struct ena_com_io_desc_addr cdesc_addr; @@ -179,6 +196,20 @@ struct ena_com_io_cq { } ____cacheline_aligned; +struct ena_com_io_bounce_buffer_control { + u8 *base_buffer; + u16 next_to_use; + u16 buffer_size; + u16 buffers_num; /* Must be a power of 2 */ +}; + +/* This struct is to keep tracking the current location of the next llq entry */ +struct ena_com_llq_pkt_ctrl { + u8 *curr_bounce_buf; + u16 idx; + u16 descs_left_in_line; +}; + struct ena_com_io_sq { struct ena_com_io_desc_addr desc_addr; @@ -190,6 +221,9 @@ struct ena_com_io_sq { u32 msix_vector; struct ena_com_tx_meta cached_tx_meta; + struct ena_com_llq_info llq_info; + struct ena_com_llq_pkt_ctrl llq_buf_ctrl; + struct ena_com_io_bounce_buffer_control bounce_buf_ctrl; u16 q_depth; u16 qid; @@ -197,6 +231,7 @@ struct ena_com_io_sq { u16 idx; u16 tail; u16 next_to_comp; + u16 llq_last_copy_tail; u32 tx_max_header_size; u8 phase; u8 desc_entry_size; @@ -334,6 +369,8 @@ struct ena_com_dev { u16 intr_delay_resolution; u32 intr_moder_tx_interval; struct ena_intr_moder_entry *intr_moder_tbl; + + struct ena_com_llq_info llq_info; }; struct ena_com_dev_get_features_ctx { @@ -342,6 +379,7 @@ struct ena_com_dev_get_features_ctx { struct ena_admin_feature_aenq_desc aenq; struct ena_admin_feature_offload_desc offload; struct ena_admin_ena_hw_hints hw_hints; + struct ena_admin_feature_llq_desc llq; }; struct ena_com_create_io_ctx { Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c =================================================================== --- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2959,7 +2959,7 @@ static int ena_calc_io_queue_num(struct /* In case of LLQ use the llq number in the get feature cmd */ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { - io_sq_num = get_feat_ctx->max_queues.max_llq_num; + io_sq_num = get_feat_ctx->max_queues.max_legacy_llq_num; if (io_sq_num == 0) { dev_err(&pdev->dev, @@ -2995,7 +2995,7 @@ static void ena_set_push_mode(struct pci has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR); /* Enable push mode if device supports LLQ */ - if (has_mem_bar && (get_feat_ctx->max_queues.max_llq_num > 0)) + if (has_mem_bar && get_feat_ctx->max_queues.max_legacy_llq_num > 0) ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV; else ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; @@ -3131,7 +3131,7 @@ static int ena_calc_queue_size(struct pc if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) queue_size = min_t(u32, queue_size, - get_feat_ctx->max_queues.max_llq_depth); + get_feat_ctx->max_queues.max_legacy_llq_depth); queue_size = rounddown_pow_of_two(queue_size);