diff options
Diffstat (limited to 'net/core/skbuff.c')
-rw-r--r-- | net/core/skbuff.c | 188 |
1 files changed, 149 insertions, 39 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 570269c477..4096e679f6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -69,6 +69,7 @@ #include <net/sock.h> #include <net/checksum.h> #include <net/gso.h> +#include <net/hotdata.h> #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <net/mpls.h> @@ -88,15 +89,10 @@ #include "dev.h" #include "sock_destructor.h" -struct kmem_cache *skbuff_cache __ro_after_init; -static struct kmem_cache *skbuff_fclone_cache __ro_after_init; #ifdef CONFIG_SKB_EXTENSIONS static struct kmem_cache *skbuff_ext_cache __ro_after_init; #endif - -static struct kmem_cache *skb_small_head_cache __ro_after_init; - #define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER) /* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. @@ -115,6 +111,24 @@ static struct kmem_cache *skb_small_head_cache __ro_after_init; int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; EXPORT_SYMBOL(sysctl_max_skb_frags); +/* kcm_write_msgs() relies on casting paged frags to bio_vec to use + * iov_iter_bvec(). These static asserts ensure the cast is valid is long as the + * netmem is a page. + */ +static_assert(offsetof(struct bio_vec, bv_page) == + offsetof(skb_frag_t, netmem)); +static_assert(sizeof_field(struct bio_vec, bv_page) == + sizeof_field(skb_frag_t, netmem)); + +static_assert(offsetof(struct bio_vec, bv_len) == offsetof(skb_frag_t, len)); +static_assert(sizeof_field(struct bio_vec, bv_len) == + sizeof_field(skb_frag_t, len)); + +static_assert(offsetof(struct bio_vec, bv_offset) == + offsetof(skb_frag_t, offset)); +static_assert(sizeof_field(struct bio_vec, bv_offset) == + sizeof_field(skb_frag_t, offset)); + #undef FN #define FN(reason) [SKB_DROP_REASON_##reason] = #reason, static const char * const drop_reasons[] = { @@ -297,7 +311,8 @@ void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) fragsz = SKB_DATA_ALIGN(fragsz); - return page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask); + return __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, + align_mask); } EXPORT_SYMBOL(__napi_alloc_frag_align); @@ -309,13 +324,15 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) if (in_hardirq() || irqs_disabled()) { struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache); - data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask); + data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, + align_mask); } else { struct napi_alloc_cache *nc; local_bh_disable(); nc = this_cpu_ptr(&napi_alloc_cache); - data = page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask); + data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, + align_mask); local_bh_enable(); } return data; @@ -328,7 +345,7 @@ static struct sk_buff *napi_skb_cache_get(void) struct sk_buff *skb; if (unlikely(!nc->skb_count)) { - nc->skb_count = kmem_cache_alloc_bulk(skbuff_cache, + nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, GFP_ATOMIC, NAPI_SKB_CACHE_BULK, nc->skb_cache); @@ -337,7 +354,7 @@ static struct sk_buff *napi_skb_cache_get(void) } skb = nc->skb_cache[--nc->skb_count]; - kasan_mempool_unpoison_object(skb, kmem_cache_size(skbuff_cache)); + kasan_mempool_unpoison_object(skb, kmem_cache_size(net_hotdata.skbuff_cache)); return skb; } @@ -395,7 +412,7 @@ struct sk_buff *slab_build_skb(void *data) struct sk_buff *skb; unsigned int size; - skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC); + skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC); if (unlikely(!skb)) return NULL; @@ -446,7 +463,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb; - skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC); + skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC); if (unlikely(!skb)) return NULL; @@ -557,7 +574,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node, obj_size = SKB_HEAD_ALIGN(*size); if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE && !(flags & KMALLOC_NOT_NORMAL_BITS)) { - obj = kmem_cache_alloc_node(skb_small_head_cache, + obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache, flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node); *size = SKB_SMALL_HEAD_CACHE_SIZE; @@ -565,7 +582,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node, goto out; /* Try again but now we are using pfmemalloc reserves */ ret_pfmemalloc = true; - obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node); + obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache, flags, node); goto out; } @@ -628,7 +645,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, u8 *data; cache = (flags & SKB_ALLOC_FCLONE) - ? skbuff_fclone_cache : skbuff_cache; + ? net_hotdata.skbuff_fclone_cache : net_hotdata.skbuff_cache; if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) gfp_mask |= __GFP_MEMALLOC; @@ -845,17 +862,17 @@ skb_fail: } EXPORT_SYMBOL(__napi_alloc_skb); -void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, - int size, unsigned int truesize) +void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, + int off, int size, unsigned int truesize) { DEBUG_NET_WARN_ON_ONCE(size > truesize); - skb_fill_page_desc(skb, i, page, off, size); + skb_fill_netmem_desc(skb, i, netmem, off, size); skb->len += size; skb->data_len += size; skb->truesize += truesize; } -EXPORT_SYMBOL(skb_add_rx_frag); +EXPORT_SYMBOL(skb_add_rx_frag_netmem); void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize) @@ -895,6 +912,98 @@ static bool is_pp_page(struct page *page) return (page->pp_magic & ~0x3UL) == PP_SIGNATURE; } +int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, + unsigned int headroom) +{ +#if IS_ENABLED(CONFIG_PAGE_POOL) + u32 size, truesize, len, max_head_size, off; + struct sk_buff *skb = *pskb, *nskb; + int err, i, head_off; + void *data; + + /* XDP does not support fraglist so we need to linearize + * the skb. + */ + if (skb_has_frag_list(skb)) + return -EOPNOTSUPP; + + max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - headroom); + if (skb->len > max_head_size + MAX_SKB_FRAGS * PAGE_SIZE) + return -ENOMEM; + + size = min_t(u32, skb->len, max_head_size); + truesize = SKB_HEAD_ALIGN(size) + headroom; + data = page_pool_dev_alloc_va(pool, &truesize); + if (!data) + return -ENOMEM; + + nskb = napi_build_skb(data, truesize); + if (!nskb) { + page_pool_free_va(pool, data, true); + return -ENOMEM; + } + + skb_reserve(nskb, headroom); + skb_copy_header(nskb, skb); + skb_mark_for_recycle(nskb); + + err = skb_copy_bits(skb, 0, nskb->data, size); + if (err) { + consume_skb(nskb); + return err; + } + skb_put(nskb, size); + + head_off = skb_headroom(nskb) - skb_headroom(skb); + skb_headers_offset_update(nskb, head_off); + + off = size; + len = skb->len - off; + for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) { + struct page *page; + u32 page_off; + + size = min_t(u32, len, PAGE_SIZE); + truesize = size; + + page = page_pool_dev_alloc(pool, &page_off, &truesize); + if (!page) { + consume_skb(nskb); + return -ENOMEM; + } + + skb_add_rx_frag(nskb, i, page, page_off, size, truesize); + err = skb_copy_bits(skb, off, page_address(page) + page_off, + size); + if (err) { + consume_skb(nskb); + return err; + } + + len -= size; + off += size; + } + + consume_skb(skb); + *pskb = nskb; + + return 0; +#else + return -EOPNOTSUPP; +#endif +} +EXPORT_SYMBOL(skb_pp_cow_data); + +int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, + struct bpf_prog *prog) +{ + if (!prog->aux->xdp_has_frags) + return -EINVAL; + + return skb_pp_cow_data(pool, pskb, XDP_PACKET_HEADROOM); +} +EXPORT_SYMBOL(skb_cow_data_for_xdp); + #if IS_ENABLED(CONFIG_PAGE_POOL) bool napi_pp_put_page(struct page *page, bool napi_safe) { @@ -923,9 +1032,10 @@ bool napi_pp_put_page(struct page *page, bool napi_safe) */ if (napi_safe || in_softirq()) { const struct napi_struct *napi = READ_ONCE(pp->p.napi); + unsigned int cpuid = smp_processor_id(); - allow_direct = napi && - READ_ONCE(napi->list_owner) == smp_processor_id(); + allow_direct = napi && READ_ONCE(napi->list_owner) == cpuid; + allow_direct |= READ_ONCE(pp->cpuid) == cpuid; } /* Driver set this to memory recycling info. Reset it on recycle. @@ -981,7 +1091,7 @@ static int skb_pp_frag_ref(struct sk_buff *skb) static void skb_kfree_head(void *head, unsigned int end_offset) { if (end_offset == SKB_SMALL_HEAD_HEADROOM) - kmem_cache_free(skb_small_head_cache, head); + kmem_cache_free(net_hotdata.skb_small_head_cache, head); else kfree(head); } @@ -1005,9 +1115,7 @@ static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason, struct skb_shared_info *shinfo = skb_shinfo(skb); int i; - if (skb->cloned && - atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, - &shinfo->dataref)) + if (!skb_data_unref(skb, shinfo)) goto exit; if (skb_zcopy(skb)) { @@ -1048,7 +1156,7 @@ static void kfree_skbmem(struct sk_buff *skb) switch (skb->fclone) { case SKB_FCLONE_UNAVAILABLE: - kmem_cache_free(skbuff_cache, skb); + kmem_cache_free(net_hotdata.skbuff_cache, skb); return; case SKB_FCLONE_ORIG: @@ -1069,7 +1177,7 @@ static void kfree_skbmem(struct sk_buff *skb) if (!refcount_dec_and_test(&fclones->fclone_ref)) return; fastpath: - kmem_cache_free(skbuff_fclone_cache, fclones); + kmem_cache_free(net_hotdata.skbuff_fclone_cache, fclones); } void skb_release_head_state(struct sk_buff *skb) @@ -1166,7 +1274,7 @@ static void kfree_skb_add_bulk(struct sk_buff *skb, sa->skb_array[sa->skb_count++] = skb; if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) { - kmem_cache_free_bulk(skbuff_cache, KFREE_SKB_BULK_SIZE, + kmem_cache_free_bulk(net_hotdata.skbuff_cache, KFREE_SKB_BULK_SIZE, sa->skb_array); sa->skb_count = 0; } @@ -1191,7 +1299,7 @@ kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason) } if (sa.skb_count) - kmem_cache_free_bulk(skbuff_cache, sa.skb_count, sa.skb_array); + kmem_cache_free_bulk(net_hotdata.skbuff_cache, sa.skb_count, sa.skb_array); } EXPORT_SYMBOL(kfree_skb_list_reason); @@ -1353,9 +1461,9 @@ static void napi_skb_cache_put(struct sk_buff *skb) if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) { for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++) kasan_mempool_unpoison_object(nc->skb_cache[i], - kmem_cache_size(skbuff_cache)); + kmem_cache_size(net_hotdata.skbuff_cache)); - kmem_cache_free_bulk(skbuff_cache, NAPI_SKB_CACHE_HALF, + kmem_cache_free_bulk(net_hotdata.skbuff_cache, NAPI_SKB_CACHE_HALF, nc->skb_cache + NAPI_SKB_CACHE_HALF); nc->skb_count = NAPI_SKB_CACHE_HALF; } @@ -1906,10 +2014,11 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) /* skb frags point to kernel buffers */ for (i = 0; i < new_frags - 1; i++) { - __skb_fill_page_desc(skb, i, head, 0, psize); + __skb_fill_netmem_desc(skb, i, page_to_netmem(head), 0, psize); head = (struct page *)page_private(head); } - __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); + __skb_fill_netmem_desc(skb, new_frags - 1, page_to_netmem(head), 0, + d_off); skb_shinfo(skb)->nr_frags = new_frags; release: @@ -1951,7 +2060,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; - n = kmem_cache_alloc(skbuff_cache, gfp_mask); + n = kmem_cache_alloc(net_hotdata.skbuff_cache, gfp_mask); if (!n) return NULL; @@ -3658,7 +3767,8 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) if (plen) { page = virt_to_head_page(from->head); offset = from->data - (unsigned char *)page_address(page); - __skb_fill_page_desc(to, 0, page, offset, plen); + __skb_fill_netmem_desc(to, 0, page_to_netmem(page), + offset, plen); get_page(page); j = 1; len -= plen; @@ -4900,7 +5010,7 @@ static void skb_extensions_init(void) {} void __init skb_init(void) { - skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache", + net_hotdata.skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache", sizeof(struct sk_buff), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC| @@ -4908,7 +5018,7 @@ void __init skb_init(void) offsetof(struct sk_buff, cb), sizeof_field(struct sk_buff, cb), NULL); - skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", + net_hotdata.skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", sizeof(struct sk_buff_fclones), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, @@ -4917,7 +5027,7 @@ void __init skb_init(void) * struct skb_shared_info is located at the end of skb->head, * and should not be copied to/from user. */ - skb_small_head_cache = kmem_cache_create_usercopy("skbuff_small_head", + net_hotdata.skb_small_head_cache = kmem_cache_create_usercopy("skbuff_small_head", SKB_SMALL_HEAD_CACHE_SIZE, 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, @@ -5790,7 +5900,7 @@ void kfree_skb_partial(struct sk_buff *skb, bool head_stolen) { if (head_stolen) { skb_release_head_state(skb); - kmem_cache_free(skbuff_cache, skb); + kmem_cache_free(net_hotdata.skbuff_cache, skb); } else { __kfree_skb(skb); } |