diff options
Diffstat (limited to 'include/net/libeth/rx.h')
-rw-r--r-- | include/net/libeth/rx.h | 242 |
1 files changed, 242 insertions, 0 deletions
diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h new file mode 100644 index 0000000000..f29ea3e34c --- /dev/null +++ b/include/net/libeth/rx.h @@ -0,0 +1,242 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2024 Intel Corporation */ + +#ifndef __LIBETH_RX_H +#define __LIBETH_RX_H + +#include <linux/if_vlan.h> + +#include <net/page_pool/helpers.h> +#include <net/xdp.h> + +/* Rx buffer management */ + +/* Space reserved in front of each frame */ +#define LIBETH_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) +/* Maximum headroom for worst-case calculations */ +#define LIBETH_MAX_HEADROOM LIBETH_SKB_HEADROOM +/* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */ +#define LIBETH_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN) + +/* Always use order-0 pages */ +#define LIBETH_RX_PAGE_ORDER 0 +/* Pick a sane buffer stride and align to a cacheline boundary */ +#define LIBETH_RX_BUF_STRIDE SKB_DATA_ALIGN(128) +/* HW-writeable space in one buffer: truesize - headroom/tailroom, aligned */ +#define LIBETH_RX_PAGE_LEN(hr) \ + ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBETH_RX_PAGE_ORDER), \ + LIBETH_RX_BUF_STRIDE) + +/** + * struct libeth_fqe - structure representing an Rx buffer (fill queue element) + * @page: page holding the buffer + * @offset: offset from the page start (to the headroom) + * @truesize: total space occupied by the buffer (w/ headroom and tailroom) + * + * Depending on the MTU, API switches between one-page-per-frame and shared + * page model (to conserve memory on bigger-page platforms). In case of the + * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```. + */ +struct libeth_fqe { + struct page *page; + u32 offset; + u32 truesize; +} __aligned_largest; + +/** + * struct libeth_fq - structure representing a buffer (fill) queue + * @fp: hotpath part of the structure + * @pp: &page_pool for buffer management + * @fqes: array of Rx buffers + * @truesize: size to allocate per buffer, w/overhead + * @count: number of descriptors/buffers the queue has + * @buf_len: HW-writeable length per each buffer + * @nid: ID of the closest NUMA node with memory + */ +struct libeth_fq { + struct_group_tagged(libeth_fq_fp, fp, + struct page_pool *pp; + struct libeth_fqe *fqes; + + u32 truesize; + u32 count; + ); + + /* Cold fields */ + u32 buf_len; + int nid; +}; + +int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi); +void libeth_rx_fq_destroy(struct libeth_fq *fq); + +/** + * libeth_rx_alloc - allocate a new Rx buffer + * @fq: fill queue to allocate for + * @i: index of the buffer within the queue + * + * Return: DMA address to be passed to HW for Rx on successful allocation, + * ```DMA_MAPPING_ERROR``` otherwise. + */ +static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i) +{ + struct libeth_fqe *buf = &fq->fqes[i]; + + buf->truesize = fq->truesize; + buf->page = page_pool_dev_alloc(fq->pp, &buf->offset, &buf->truesize); + if (unlikely(!buf->page)) + return DMA_MAPPING_ERROR; + + return page_pool_get_dma_addr(buf->page) + buf->offset + + fq->pp->p.offset; +} + +void libeth_rx_recycle_slow(struct page *page); + +/** + * libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA + * @fqe: buffer to process + * @len: frame length from the descriptor + * + * Process the buffer after it's written by HW. The regular path is to + * synchronize DMA for CPU, but in case of no data it will be immediately + * recycled back to its PP. + * + * Return: true when there's data to process, false otherwise. + */ +static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe, + u32 len) +{ + struct page *page = fqe->page; + + /* Very rare, but possible case. The most common reason: + * the last fragment contained FCS only, which was then + * stripped by the HW. + */ + if (unlikely(!len)) { + libeth_rx_recycle_slow(page); + return false; + } + + page_pool_dma_sync_for_cpu(page->pp, page, fqe->offset, len); + + return true; +} + +/* Converting abstract packet type numbers into a software structure with + * the packet parameters to do O(1) lookup on Rx. + */ + +enum { + LIBETH_RX_PT_OUTER_L2 = 0U, + LIBETH_RX_PT_OUTER_IPV4, + LIBETH_RX_PT_OUTER_IPV6, +}; + +enum { + LIBETH_RX_PT_NOT_FRAG = 0U, + LIBETH_RX_PT_FRAG, +}; + +enum { + LIBETH_RX_PT_TUNNEL_IP_NONE = 0U, + LIBETH_RX_PT_TUNNEL_IP_IP, + LIBETH_RX_PT_TUNNEL_IP_GRENAT, + LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC, + LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC_VLAN, +}; + +enum { + LIBETH_RX_PT_TUNNEL_END_NONE = 0U, + LIBETH_RX_PT_TUNNEL_END_IPV4, + LIBETH_RX_PT_TUNNEL_END_IPV6, +}; + +enum { + LIBETH_RX_PT_INNER_NONE = 0U, + LIBETH_RX_PT_INNER_UDP, + LIBETH_RX_PT_INNER_TCP, + LIBETH_RX_PT_INNER_SCTP, + LIBETH_RX_PT_INNER_ICMP, + LIBETH_RX_PT_INNER_TIMESYNC, +}; + +#define LIBETH_RX_PT_PAYLOAD_NONE PKT_HASH_TYPE_NONE +#define LIBETH_RX_PT_PAYLOAD_L2 PKT_HASH_TYPE_L2 +#define LIBETH_RX_PT_PAYLOAD_L3 PKT_HASH_TYPE_L3 +#define LIBETH_RX_PT_PAYLOAD_L4 PKT_HASH_TYPE_L4 + +struct libeth_rx_pt { + u32 outer_ip:2; + u32 outer_frag:1; + u32 tunnel_type:3; + u32 tunnel_end_prot:2; + u32 tunnel_end_frag:1; + u32 inner_prot:3; + enum pkt_hash_types payload_layer:2; + + u32 pad:2; + enum xdp_rss_hash_type hash_type:16; +}; + +void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt); + +/** + * libeth_rx_pt_get_ip_ver - get IP version from a packet type structure + * @pt: packet type params + * + * Wrapper to compile out the IPv6 code from the drivers when not supported + * by the kernel. + * + * Return: @pt.outer_ip or stub for IPv6 when not compiled-in. + */ +static inline u32 libeth_rx_pt_get_ip_ver(struct libeth_rx_pt pt) +{ +#if !IS_ENABLED(CONFIG_IPV6) + switch (pt.outer_ip) { + case LIBETH_RX_PT_OUTER_IPV4: + return LIBETH_RX_PT_OUTER_IPV4; + default: + return LIBETH_RX_PT_OUTER_L2; + } +#else + return pt.outer_ip; +#endif +} + +/* libeth_has_*() can be used to quickly check whether the HW metadata is + * available to avoid further expensive processing such as descriptor reads. + * They already check for the corresponding netdev feature to be enabled, + * thus can be used as drop-in replacements. + */ + +static inline bool libeth_rx_pt_has_checksum(const struct net_device *dev, + struct libeth_rx_pt pt) +{ + /* Non-zero _INNER* is only possible when _OUTER_IPV* is set, + * it is enough to check only for the L4 type. + */ + return likely(pt.inner_prot > LIBETH_RX_PT_INNER_NONE && + (dev->features & NETIF_F_RXCSUM)); +} + +static inline bool libeth_rx_pt_has_hash(const struct net_device *dev, + struct libeth_rx_pt pt) +{ + return likely(pt.payload_layer > LIBETH_RX_PT_PAYLOAD_NONE && + (dev->features & NETIF_F_RXHASH)); +} + +/** + * libeth_rx_pt_set_hash - fill in skb hash value basing on the PT + * @skb: skb to fill the hash in + * @hash: 32-bit hash value from the descriptor + * @pt: packet type + */ +static inline void libeth_rx_pt_set_hash(struct sk_buff *skb, u32 hash, + struct libeth_rx_pt pt) +{ + skb_set_hash(skb, hash, pt.payload_layer); +} + +#endif /* __LIBETH_RX_H */ |