diff options
Diffstat (limited to '')
-rw-r--r-- | debian/patches/features/all/ena/net-ena-allow-queue-allocation-backoff-when-low-on-m.patch | 323 |
1 files changed, 323 insertions, 0 deletions
diff --git a/debian/patches/features/all/ena/net-ena-allow-queue-allocation-backoff-when-low-on-m.patch b/debian/patches/features/all/ena/net-ena-allow-queue-allocation-backoff-when-low-on-m.patch new file mode 100644 index 000000000..6f902b864 --- /dev/null +++ b/debian/patches/features/all/ena/net-ena-allow-queue-allocation-backoff-when-low-on-m.patch @@ -0,0 +1,323 @@ +From: Sameeh Jubran <sameehj@amazon.com> +Date: Tue, 11 Jun 2019 14:58:08 +0300 +Subject: [PATCH] net: ena: allow queue allocation backoff when low on memory +Origin: https://git.kernel.org/linus/13ca32a69e29f3a0fe72094dd930f312b3f3ee44 +Bug-Debian: https://bugs.debian.org/941291 + +If there is not enough memory to allocate io queues the driver will +try to allocate smaller queues. + +The backoff algorithm is as follows: + +1. Try to allocate TX and RX and if successful. +1.1. return success + +2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same). + +3. If TX or RX is smaller than 256 +3.1. return failure. +4. else +4.1. go back to 1. + +Also change the tx_queue_size, rx_queue_size field names in struct +adapter to requested_tx_queue_size and requested_rx_queue_size, and +use RX and TX queue 0 for actual queue sizes. +Explanation: +The original fields were useless as they were simply used to assign +values once from them to each of the queues in the adapter in ena_probe(). +They could simply be deleted. However now that we have a backoff +feature, we have use for them. In case of backoff there is a difference +between the requested queue sizes and the actual sizes. Therefore there +is a need to save the requested queue size for future retries of queue +allocation (for example if allocation failed and then ifdown + ifup was +called we want to start the allocation from the original requested size of +the queues). + +Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com> +Signed-off-by: Sameeh Jubran <sameehj@amazon.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + drivers/net/ethernet/amazon/ena/ena_ethtool.c | 4 +- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 159 +++++++++++++----- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 6 +- + 3 files changed, 127 insertions(+), 42 deletions(-) + +Index: linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_ethtool.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_ethtool.c +@@ -450,8 +450,8 @@ static void ena_get_ringparam(struct net + + ring->tx_max_pending = adapter->max_tx_ring_size; + ring->rx_max_pending = adapter->max_rx_ring_size; +- ring->tx_pending = adapter->tx_ring_size; +- ring->rx_pending = adapter->rx_ring_size; ++ ring->tx_pending = adapter->tx_ring[0].ring_size; ++ ring->rx_pending = adapter->rx_ring[0].ring_size; + } + + static u32 ena_flow_hash_to_flow_type(u16 hash_fields) +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -182,7 +182,7 @@ static void ena_init_io_rings(struct ena + ena_init_io_rings_common(adapter, rxr, i); + + /* TX specific ring state */ +- txr->ring_size = adapter->tx_ring_size; ++ txr->ring_size = adapter->requested_tx_ring_size; + txr->tx_max_header_size = ena_dev->tx_max_header_size; + txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type; + txr->sgl_size = adapter->max_tx_sgl_size; +@@ -190,7 +190,7 @@ static void ena_init_io_rings(struct ena + ena_com_get_nonadaptive_moderation_interval_tx(ena_dev); + + /* RX specific ring state */ +- rxr->ring_size = adapter->rx_ring_size; ++ rxr->ring_size = adapter->requested_rx_ring_size; + rxr->rx_copybreak = adapter->rx_copybreak; + rxr->sgl_size = adapter->max_rx_sgl_size; + rxr->smoothed_interval = +@@ -594,7 +594,6 @@ static void ena_free_rx_bufs(struct ena_ + + /* ena_refill_all_rx_bufs - allocate all queues Rx buffers + * @adapter: board private structure +- * + */ + static void ena_refill_all_rx_bufs(struct ena_adapter *adapter) + { +@@ -1635,7 +1634,7 @@ static int ena_create_io_tx_queue(struct + ctx.qid = ena_qid; + ctx.mem_queue_type = ena_dev->tx_mem_queue_type; + ctx.msix_vector = msix_vector; +- ctx.queue_size = adapter->tx_ring_size; ++ ctx.queue_size = tx_ring->ring_size; + ctx.numa_node = cpu_to_node(tx_ring->cpu); + + rc = ena_com_create_io_queue(ena_dev, &ctx); +@@ -1702,7 +1701,7 @@ static int ena_create_io_rx_queue(struct + ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; + ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; + ctx.msix_vector = msix_vector; +- ctx.queue_size = adapter->rx_ring_size; ++ ctx.queue_size = rx_ring->ring_size; + ctx.numa_node = cpu_to_node(rx_ring->cpu); + + rc = ena_com_create_io_queue(ena_dev, &ctx); +@@ -1749,6 +1748,112 @@ create_err: + return rc; + } + ++static void set_io_rings_size(struct ena_adapter *adapter, ++ int new_tx_size, int new_rx_size) ++{ ++ int i; ++ ++ for (i = 0; i < adapter->num_queues; i++) { ++ adapter->tx_ring[i].ring_size = new_tx_size; ++ adapter->rx_ring[i].ring_size = new_rx_size; ++ } ++} ++ ++/* This function allows queue allocation to backoff when the system is ++ * low on memory. If there is not enough memory to allocate io queues ++ * the driver will try to allocate smaller queues. ++ * ++ * The backoff algorithm is as follows: ++ * 1. Try to allocate TX and RX and if successful. ++ * 1.1. return success ++ * ++ * 2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same). ++ * ++ * 3. If TX or RX is smaller than 256 ++ * 3.1. return failure. ++ * 4. else ++ * 4.1. go back to 1. ++ */ ++static int create_queues_with_size_backoff(struct ena_adapter *adapter) ++{ ++ int rc, cur_rx_ring_size, cur_tx_ring_size; ++ int new_rx_ring_size, new_tx_ring_size; ++ ++ /* current queue sizes might be set to smaller than the requested ++ * ones due to past queue allocation failures. ++ */ ++ set_io_rings_size(adapter, adapter->requested_tx_ring_size, ++ adapter->requested_rx_ring_size); ++ ++ while (1) { ++ rc = ena_setup_all_tx_resources(adapter); ++ if (rc) ++ goto err_setup_tx; ++ ++ rc = ena_create_all_io_tx_queues(adapter); ++ if (rc) ++ goto err_create_tx_queues; ++ ++ rc = ena_setup_all_rx_resources(adapter); ++ if (rc) ++ goto err_setup_rx; ++ ++ rc = ena_create_all_io_rx_queues(adapter); ++ if (rc) ++ goto err_create_rx_queues; ++ ++ return 0; ++ ++err_create_rx_queues: ++ ena_free_all_io_rx_resources(adapter); ++err_setup_rx: ++ ena_destroy_all_tx_queues(adapter); ++err_create_tx_queues: ++ ena_free_all_io_tx_resources(adapter); ++err_setup_tx: ++ if (rc != -ENOMEM) { ++ netif_err(adapter, ifup, adapter->netdev, ++ "Queue creation failed with error code %d\n", ++ rc); ++ return rc; ++ } ++ ++ cur_tx_ring_size = adapter->tx_ring[0].ring_size; ++ cur_rx_ring_size = adapter->rx_ring[0].ring_size; ++ ++ netif_err(adapter, ifup, adapter->netdev, ++ "Not enough memory to create queues with sizes TX=%d, RX=%d\n", ++ cur_tx_ring_size, cur_rx_ring_size); ++ ++ new_tx_ring_size = cur_tx_ring_size; ++ new_rx_ring_size = cur_rx_ring_size; ++ ++ /* Decrease the size of the larger queue, or ++ * decrease both if they are the same size. ++ */ ++ if (cur_rx_ring_size <= cur_tx_ring_size) ++ new_tx_ring_size = cur_tx_ring_size / 2; ++ if (cur_rx_ring_size >= cur_tx_ring_size) ++ new_rx_ring_size = cur_rx_ring_size / 2; ++ ++ if (cur_tx_ring_size < ENA_MIN_RING_SIZE || ++ cur_rx_ring_size < ENA_MIN_RING_SIZE) { ++ netif_err(adapter, ifup, adapter->netdev, ++ "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n", ++ ENA_MIN_RING_SIZE); ++ return rc; ++ } ++ ++ netif_err(adapter, ifup, adapter->netdev, ++ "Retrying queue creation with sizes TX=%d, RX=%d\n", ++ new_tx_ring_size, ++ new_rx_ring_size); ++ ++ set_io_rings_size(adapter, new_tx_ring_size, ++ new_rx_ring_size); ++ } ++} ++ + static int ena_up(struct ena_adapter *adapter) + { + int rc, i; +@@ -1768,25 +1873,9 @@ static int ena_up(struct ena_adapter *ad + if (rc) + goto err_req_irq; + +- /* allocate transmit descriptors */ +- rc = ena_setup_all_tx_resources(adapter); ++ rc = create_queues_with_size_backoff(adapter); + if (rc) +- goto err_setup_tx; +- +- /* allocate receive descriptors */ +- rc = ena_setup_all_rx_resources(adapter); +- if (rc) +- goto err_setup_rx; +- +- /* Create TX queues */ +- rc = ena_create_all_io_tx_queues(adapter); +- if (rc) +- goto err_create_tx_queues; +- +- /* Create RX queues */ +- rc = ena_create_all_io_rx_queues(adapter); +- if (rc) +- goto err_create_rx_queues; ++ goto err_create_queues_with_backoff; + + rc = ena_up_complete(adapter); + if (rc) +@@ -1815,14 +1904,11 @@ static int ena_up(struct ena_adapter *ad + return rc; + + err_up: +- ena_destroy_all_rx_queues(adapter); +-err_create_rx_queues: + ena_destroy_all_tx_queues(adapter); +-err_create_tx_queues: +- ena_free_all_io_rx_resources(adapter); +-err_setup_rx: + ena_free_all_io_tx_resources(adapter); +-err_setup_tx: ++ ena_destroy_all_rx_queues(adapter); ++ ena_free_all_io_rx_resources(adapter); ++err_create_queues_with_backoff: + ena_free_io_irq(adapter); + err_req_irq: + ena_del_napi(adapter); +@@ -3286,17 +3372,14 @@ static int ena_calc_queue_size(struct en + max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size); + max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size); + +- tx_queue_size = min_t(u32, tx_queue_size, max_tx_queue_size); +- rx_queue_size = min_t(u32, rx_queue_size, max_rx_queue_size); ++ tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE, ++ max_tx_queue_size); ++ rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE, ++ max_rx_queue_size); + + tx_queue_size = rounddown_pow_of_two(tx_queue_size); + rx_queue_size = rounddown_pow_of_two(rx_queue_size); + +- if (unlikely(!rx_queue_size || !tx_queue_size)) { +- dev_err(&ctx->pdev->dev, "Invalid queue size\n"); +- return -EFAULT; +- } +- + ctx->max_tx_queue_size = max_tx_queue_size; + ctx->max_rx_queue_size = max_rx_queue_size; + ctx->tx_queue_size = tx_queue_size; +@@ -3426,8 +3509,8 @@ static int ena_probe(struct pci_dev *pde + adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); + adapter->reset_reason = ENA_REGS_RESET_NORMAL; + +- adapter->tx_ring_size = calc_queue_ctx.tx_queue_size; +- adapter->rx_ring_size = calc_queue_ctx.rx_queue_size; ++ adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size; ++ adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size; + adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; + adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; + adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; +Index: linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +=================================================================== +--- linux.orig/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ linux/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -79,6 +79,8 @@ + #define ENA_BAR_MASK (BIT(ENA_REG_BAR) | BIT(ENA_MEM_BAR)) + + #define ENA_DEFAULT_RING_SIZE (1024) ++#define ENA_MIN_RING_SIZE (256) ++ + + #define ENA_TX_WAKEUP_THRESH (MAX_SKB_FRAGS + 2) + #define ENA_DEFAULT_RX_COPYBREAK (256 - NET_IP_ALIGN) +@@ -330,8 +332,8 @@ struct ena_adapter { + u32 tx_usecs, rx_usecs; /* interrupt moderation */ + u32 tx_frames, rx_frames; /* interrupt moderation */ + +- u32 tx_ring_size; +- u32 rx_ring_size; ++ u32 requested_tx_ring_size; ++ u32 requested_rx_ring_size; + + u32 max_tx_ring_size; + u32 max_rx_ring_size; |