diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
commit | 2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch) | |
tree | 848558de17fb3008cdf4d861b01ac7781903ce39 /drivers/net/ethernet/sfc/ef100_tx.c | |
parent | Initial commit. (diff) | |
download | linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip |
Adding upstream version 6.1.76.upstream/6.1.76
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/net/ethernet/sfc/ef100_tx.c')
-rw-r--r-- | drivers/net/ethernet/sfc/ef100_tx.c | 511 |
1 files changed, 511 insertions, 0 deletions
diff --git a/drivers/net/ethernet/sfc/ef100_tx.c b/drivers/net/ethernet/sfc/ef100_tx.c new file mode 100644 index 000000000..849e5555b --- /dev/null +++ b/drivers/net/ethernet/sfc/ef100_tx.c @@ -0,0 +1,511 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * Copyright 2019-2020 Xilinx Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include <net/ip6_checksum.h> + +#include "net_driver.h" +#include "tx_common.h" +#include "nic_common.h" +#include "mcdi_functions.h" +#include "ef100_regs.h" +#include "io.h" +#include "ef100_tx.h" +#include "ef100_nic.h" + +int ef100_tx_probe(struct efx_tx_queue *tx_queue) +{ + /* Allocate an extra descriptor for the QMDA status completion entry */ + return efx_nic_alloc_buffer(tx_queue->efx, &tx_queue->txd.buf, + (tx_queue->ptr_mask + 2) * + sizeof(efx_oword_t), + GFP_KERNEL); +} + +void ef100_tx_init(struct efx_tx_queue *tx_queue) +{ + /* must be the inverse of lookup in efx_get_tx_channel */ + tx_queue->core_txq = + netdev_get_tx_queue(tx_queue->efx->net_dev, + tx_queue->channel->channel - + tx_queue->efx->tx_channel_offset); + + /* This value is purely documentational; as EF100 never passes through + * the switch statement in tx.c:__efx_enqueue_skb(), that switch does + * not handle case 3. EF100's TSOv3 descriptors are generated by + * ef100_make_tso_desc(). + * Meanwhile, all efx_mcdi_tx_init() cares about is that it's not 2. + */ + tx_queue->tso_version = 3; + if (efx_mcdi_tx_init(tx_queue)) + netdev_WARN(tx_queue->efx->net_dev, + "failed to initialise TXQ %d\n", tx_queue->queue); +} + +static bool ef100_tx_can_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb) +{ + struct efx_nic *efx = tx_queue->efx; + struct ef100_nic_data *nic_data; + struct efx_tx_buffer *buffer; + size_t header_len; + u32 mss; + + nic_data = efx->nic_data; + + if (!skb_is_gso_tcp(skb)) + return false; + if (!(efx->net_dev->features & NETIF_F_TSO)) + return false; + + mss = skb_shinfo(skb)->gso_size; + if (unlikely(mss < 4)) { + WARN_ONCE(1, "MSS of %u is too small for TSO\n", mss); + return false; + } + + header_len = efx_tx_tso_header_length(skb); + if (header_len > nic_data->tso_max_hdr_len) + return false; + + if (skb_shinfo(skb)->gso_segs > nic_data->tso_max_payload_num_segs) { + /* net_dev->gso_max_segs should've caught this */ + WARN_ON_ONCE(1); + return false; + } + + if (skb->data_len / mss > nic_data->tso_max_frames) + return false; + + /* net_dev->gso_max_size should've caught this */ + if (WARN_ON_ONCE(skb->data_len > nic_data->tso_max_payload_len)) + return false; + + /* Reserve an empty buffer for the TSO V3 descriptor. + * Convey the length of the header since we already know it. + */ + buffer = efx_tx_queue_get_insert_buffer(tx_queue); + buffer->flags = EFX_TX_BUF_TSO_V3 | EFX_TX_BUF_CONT; + buffer->len = header_len; + buffer->unmap_len = 0; + buffer->skb = skb; + ++tx_queue->insert_count; + return true; +} + +static efx_oword_t *ef100_tx_desc(struct efx_tx_queue *tx_queue, unsigned int index) +{ + if (likely(tx_queue->txd.buf.addr)) + return ((efx_oword_t *)tx_queue->txd.buf.addr) + index; + else + return NULL; +} + +static void ef100_notify_tx_desc(struct efx_tx_queue *tx_queue) +{ + unsigned int write_ptr; + efx_dword_t reg; + + tx_queue->xmit_pending = false; + + if (unlikely(tx_queue->notify_count == tx_queue->write_count)) + return; + + write_ptr = tx_queue->write_count & tx_queue->ptr_mask; + /* The write pointer goes into the high word */ + EFX_POPULATE_DWORD_1(reg, ERF_GZ_TX_RING_PIDX, write_ptr); + efx_writed_page(tx_queue->efx, ®, + ER_GZ_TX_RING_DOORBELL, tx_queue->queue); + tx_queue->notify_count = tx_queue->write_count; +} + +static void ef100_tx_push_buffers(struct efx_tx_queue *tx_queue) +{ + ef100_notify_tx_desc(tx_queue); + ++tx_queue->pushes; +} + +static void ef100_set_tx_csum_partial(const struct sk_buff *skb, + struct efx_tx_buffer *buffer, efx_oword_t *txd) +{ + efx_oword_t csum; + int csum_start; + + if (!skb || skb->ip_summed != CHECKSUM_PARTIAL) + return; + + /* skb->csum_start has the offset from head, but we need the offset + * from data. + */ + csum_start = skb_checksum_start_offset(skb); + EFX_POPULATE_OWORD_3(csum, + ESF_GZ_TX_SEND_CSO_PARTIAL_EN, 1, + ESF_GZ_TX_SEND_CSO_PARTIAL_START_W, + csum_start >> 1, + ESF_GZ_TX_SEND_CSO_PARTIAL_CSUM_W, + skb->csum_offset >> 1); + EFX_OR_OWORD(*txd, *txd, csum); +} + +static void ef100_set_tx_hw_vlan(const struct sk_buff *skb, efx_oword_t *txd) +{ + u16 vlan_tci = skb_vlan_tag_get(skb); + efx_oword_t vlan; + + EFX_POPULATE_OWORD_2(vlan, + ESF_GZ_TX_SEND_VLAN_INSERT_EN, 1, + ESF_GZ_TX_SEND_VLAN_INSERT_TCI, vlan_tci); + EFX_OR_OWORD(*txd, *txd, vlan); +} + +static void ef100_make_send_desc(struct efx_nic *efx, + const struct sk_buff *skb, + struct efx_tx_buffer *buffer, efx_oword_t *txd, + unsigned int segment_count) +{ + /* TX send descriptor */ + EFX_POPULATE_OWORD_3(*txd, + ESF_GZ_TX_SEND_NUM_SEGS, segment_count, + ESF_GZ_TX_SEND_LEN, buffer->len, + ESF_GZ_TX_SEND_ADDR, buffer->dma_addr); + + if (likely(efx->net_dev->features & NETIF_F_HW_CSUM)) + ef100_set_tx_csum_partial(skb, buffer, txd); + if (efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX && + skb && skb_vlan_tag_present(skb)) + ef100_set_tx_hw_vlan(skb, txd); +} + +static void ef100_make_tso_desc(struct efx_nic *efx, + const struct sk_buff *skb, + struct efx_tx_buffer *buffer, efx_oword_t *txd, + unsigned int segment_count) +{ + bool gso_partial = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL; + unsigned int len, ip_offset, tcp_offset, payload_segs; + u32 mangleid = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; + unsigned int outer_ip_offset, outer_l4_offset; + u16 vlan_tci = skb_vlan_tag_get(skb); + u32 mss = skb_shinfo(skb)->gso_size; + bool encap = skb->encapsulation; + bool udp_encap = false; + u16 vlan_enable = 0; + struct tcphdr *tcp; + bool outer_csum; + u32 paylen; + + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID) + mangleid = ESE_GZ_TX_DESC_IP4_ID_NO_OP; + if (efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX) + vlan_enable = skb_vlan_tag_present(skb); + + len = skb->len - buffer->len; + /* We use 1 for the TSO descriptor and 1 for the header */ + payload_segs = segment_count - 2; + if (encap) { + outer_ip_offset = skb_network_offset(skb); + outer_l4_offset = skb_transport_offset(skb); + ip_offset = skb_inner_network_offset(skb); + tcp_offset = skb_inner_transport_offset(skb); + if (skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)) + udp_encap = true; + } else { + ip_offset = skb_network_offset(skb); + tcp_offset = skb_transport_offset(skb); + outer_ip_offset = outer_l4_offset = 0; + } + outer_csum = skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM; + + /* subtract TCP payload length from inner checksum */ + tcp = (void *)skb->data + tcp_offset; + paylen = skb->len - tcp_offset; + csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen)); + + EFX_POPULATE_OWORD_19(*txd, + ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_TSO, + ESF_GZ_TX_TSO_MSS, mss, + ESF_GZ_TX_TSO_HDR_NUM_SEGS, 1, + ESF_GZ_TX_TSO_PAYLOAD_NUM_SEGS, payload_segs, + ESF_GZ_TX_TSO_HDR_LEN_W, buffer->len >> 1, + ESF_GZ_TX_TSO_PAYLOAD_LEN, len, + ESF_GZ_TX_TSO_CSO_OUTER_L4, outer_csum, + ESF_GZ_TX_TSO_CSO_INNER_L4, 1, + ESF_GZ_TX_TSO_INNER_L3_OFF_W, ip_offset >> 1, + ESF_GZ_TX_TSO_INNER_L4_OFF_W, tcp_offset >> 1, + ESF_GZ_TX_TSO_ED_INNER_IP4_ID, mangleid, + ESF_GZ_TX_TSO_ED_INNER_IP_LEN, 1, + ESF_GZ_TX_TSO_OUTER_L3_OFF_W, outer_ip_offset >> 1, + ESF_GZ_TX_TSO_OUTER_L4_OFF_W, outer_l4_offset >> 1, + ESF_GZ_TX_TSO_ED_OUTER_UDP_LEN, udp_encap && !gso_partial, + ESF_GZ_TX_TSO_ED_OUTER_IP_LEN, encap && !gso_partial, + ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, encap ? mangleid : + ESE_GZ_TX_DESC_IP4_ID_NO_OP, + ESF_GZ_TX_TSO_VLAN_INSERT_EN, vlan_enable, + ESF_GZ_TX_TSO_VLAN_INSERT_TCI, vlan_tci + ); +} + +static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue, + const struct sk_buff *skb, + unsigned int segment_count, + struct efx_rep *efv) +{ + unsigned int old_write_count = tx_queue->write_count; + unsigned int new_write_count = old_write_count; + struct efx_tx_buffer *buffer; + unsigned int next_desc_type; + unsigned int write_ptr; + efx_oword_t *txd; + unsigned int nr_descs = tx_queue->insert_count - old_write_count; + + if (unlikely(nr_descs == 0)) + return; + + if (segment_count) + next_desc_type = ESE_GZ_TX_DESC_TYPE_TSO; + else + next_desc_type = ESE_GZ_TX_DESC_TYPE_SEND; + + if (unlikely(efv)) { + /* Create TX override descriptor */ + write_ptr = new_write_count & tx_queue->ptr_mask; + txd = ef100_tx_desc(tx_queue, write_ptr); + ++new_write_count; + + tx_queue->packet_write_count = new_write_count; + EFX_POPULATE_OWORD_3(*txd, + ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_PREFIX, + ESF_GZ_TX_PREFIX_EGRESS_MPORT, efv->mport, + ESF_GZ_TX_PREFIX_EGRESS_MPORT_EN, 1); + nr_descs--; + } + + /* if it's a raw write (such as XDP) then always SEND single frames */ + if (!skb) + nr_descs = 1; + + do { + write_ptr = new_write_count & tx_queue->ptr_mask; + buffer = &tx_queue->buffer[write_ptr]; + txd = ef100_tx_desc(tx_queue, write_ptr); + ++new_write_count; + + /* Create TX descriptor ring entry */ + tx_queue->packet_write_count = new_write_count; + + switch (next_desc_type) { + case ESE_GZ_TX_DESC_TYPE_SEND: + ef100_make_send_desc(tx_queue->efx, skb, + buffer, txd, nr_descs); + break; + case ESE_GZ_TX_DESC_TYPE_TSO: + /* TX TSO descriptor */ + WARN_ON_ONCE(!(buffer->flags & EFX_TX_BUF_TSO_V3)); + ef100_make_tso_desc(tx_queue->efx, skb, + buffer, txd, nr_descs); + break; + default: + /* TX segment descriptor */ + EFX_POPULATE_OWORD_3(*txd, + ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_SEG, + ESF_GZ_TX_SEG_LEN, buffer->len, + ESF_GZ_TX_SEG_ADDR, buffer->dma_addr); + } + /* if it's a raw write (such as XDP) then always SEND */ + next_desc_type = skb ? ESE_GZ_TX_DESC_TYPE_SEG : + ESE_GZ_TX_DESC_TYPE_SEND; + /* mark as an EFV buffer if applicable */ + if (unlikely(efv)) + buffer->flags |= EFX_TX_BUF_EFV; + + } while (new_write_count != tx_queue->insert_count); + + wmb(); /* Ensure descriptors are written before they are fetched */ + + tx_queue->write_count = new_write_count; + + /* The write_count above must be updated before reading + * channel->holdoff_doorbell to avoid a race with the + * completion path, so ensure these operations are not + * re-ordered. This also flushes the update of write_count + * back into the cache. + */ + smp_mb(); +} + +void ef100_tx_write(struct efx_tx_queue *tx_queue) +{ + ef100_tx_make_descriptors(tx_queue, NULL, 0, NULL); + ef100_tx_push_buffers(tx_queue); +} + +int ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event) +{ + unsigned int tx_done = + EFX_QWORD_FIELD(*p_event, ESF_GZ_EV_TXCMPL_NUM_DESC); + unsigned int qlabel = + EFX_QWORD_FIELD(*p_event, ESF_GZ_EV_TXCMPL_Q_LABEL); + struct efx_tx_queue *tx_queue = + efx_channel_get_tx_queue(channel, qlabel); + unsigned int tx_index = (tx_queue->read_count + tx_done - 1) & + tx_queue->ptr_mask; + + return efx_xmit_done(tx_queue, tx_index); +} + +/* Add a socket buffer to a TX queue + * + * You must hold netif_tx_lock() to call this function. + * + * Returns 0 on success, error code otherwise. In case of an error this + * function will free the SKB. + */ +netdev_tx_t ef100_enqueue_skb(struct efx_tx_queue *tx_queue, + struct sk_buff *skb) +{ + return __ef100_enqueue_skb(tx_queue, skb, NULL); +} + +int __ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb, + struct efx_rep *efv) +{ + unsigned int old_insert_count = tx_queue->insert_count; + struct efx_nic *efx = tx_queue->efx; + bool xmit_more = netdev_xmit_more(); + unsigned int fill_level; + unsigned int segments; + int rc; + + if (!tx_queue->buffer || !tx_queue->ptr_mask) { + netif_stop_queue(efx->net_dev); + dev_kfree_skb_any(skb); + return -ENODEV; + } + + segments = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 0; + if (segments == 1) + segments = 0; /* Don't use TSO/GSO for a single segment. */ + if (segments && !ef100_tx_can_tso(tx_queue, skb)) { + rc = efx_tx_tso_fallback(tx_queue, skb); + tx_queue->tso_fallbacks++; + if (rc) + goto err; + else + return 0; + } + + if (unlikely(efv)) { + struct efx_tx_buffer *buffer = __efx_tx_queue_get_insert_buffer(tx_queue); + + /* Drop representor packets if the queue is stopped. + * We currently don't assert backoff to representors so this is + * to make sure representor traffic can't starve the main + * net device. + * And, of course, if there are no TX descriptors left. + */ + if (netif_tx_queue_stopped(tx_queue->core_txq) || + unlikely(efx_tx_buffer_in_use(buffer))) { + atomic64_inc(&efv->stats.tx_errors); + rc = -ENOSPC; + goto err; + } + + /* Also drop representor traffic if it could cause us to + * stop the queue. If we assert backoff and we haven't + * received traffic on the main net device recently then the + * TX watchdog can go off erroneously. + */ + fill_level = efx_channel_tx_old_fill_level(tx_queue->channel); + fill_level += efx_tx_max_skb_descs(efx); + if (fill_level > efx->txq_stop_thresh) { + struct efx_tx_queue *txq2; + + /* Refresh cached fill level and re-check */ + efx_for_each_channel_tx_queue(txq2, tx_queue->channel) + txq2->old_read_count = READ_ONCE(txq2->read_count); + + fill_level = efx_channel_tx_old_fill_level(tx_queue->channel); + fill_level += efx_tx_max_skb_descs(efx); + if (fill_level > efx->txq_stop_thresh) { + atomic64_inc(&efv->stats.tx_errors); + rc = -ENOSPC; + goto err; + } + } + + buffer->flags = EFX_TX_BUF_OPTION | EFX_TX_BUF_EFV; + tx_queue->insert_count++; + } + + /* Map for DMA and create descriptors */ + rc = efx_tx_map_data(tx_queue, skb, segments); + if (rc) + goto err; + ef100_tx_make_descriptors(tx_queue, skb, segments, efv); + + fill_level = efx_channel_tx_old_fill_level(tx_queue->channel); + if (fill_level > efx->txq_stop_thresh) { + struct efx_tx_queue *txq2; + + /* Because of checks above, representor traffic should + * not be able to stop the queue. + */ + WARN_ON(efv); + + netif_tx_stop_queue(tx_queue->core_txq); + /* Re-read after a memory barrier in case we've raced with + * the completion path. Otherwise there's a danger we'll never + * restart the queue if all completions have just happened. + */ + smp_mb(); + efx_for_each_channel_tx_queue(txq2, tx_queue->channel) + txq2->old_read_count = READ_ONCE(txq2->read_count); + fill_level = efx_channel_tx_old_fill_level(tx_queue->channel); + if (fill_level < efx->txq_stop_thresh) + netif_tx_start_queue(tx_queue->core_txq); + } + + tx_queue->xmit_pending = true; + + /* If xmit_more then we don't need to push the doorbell, unless there + * are 256 descriptors already queued in which case we have to push to + * ensure we never push more than 256 at once. + * + * Always push for representor traffic, and don't account it to parent + * PF netdevice's BQL. + */ + if (unlikely(efv) || + __netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more) || + tx_queue->write_count - tx_queue->notify_count > 255) + ef100_tx_push_buffers(tx_queue); + + if (segments) { + tx_queue->tso_bursts++; + tx_queue->tso_packets += segments; + tx_queue->tx_packets += segments; + } else { + tx_queue->tx_packets++; + } + return 0; + +err: + efx_enqueue_unwind(tx_queue, old_insert_count); + if (!IS_ERR_OR_NULL(skb)) + dev_kfree_skb_any(skb); + + /* If we're not expecting another transmit and we had something to push + * on this queue then we need to push here to get the previous packets + * out. We only enter this branch from before the xmit_more handling + * above, so xmit_pending still refers to the old state. + */ + if (tx_queue->xmit_pending && !xmit_more) + ef100_tx_push_buffers(tx_queue); + return rc; +} |