From dc50eab76b709d68175a358d6e23a5a3890764d3 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 19:39:57 +0200 Subject: Merging upstream version 6.7.7. Signed-off-by: Daniel Baumann --- net/netfilter/nft_set_pipapo.c | 204 +++++++++++++++++++++-------------------- 1 file changed, 105 insertions(+), 99 deletions(-) (limited to 'net/netfilter/nft_set_pipapo.c') diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 3ff31043f..3089c4ca8 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -342,9 +342,6 @@ #include "nft_set_pipapo_avx2.h" #include "nft_set_pipapo.h" -/* Current working bitmap index, toggled between field matches */ -static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index); - /** * pipapo_refill() - For each set bit, set bits from selected mapping table item * @map: Bitmap to be scanned for set bits @@ -412,6 +409,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_scratch *scratch; unsigned long *res_map, *fill_map; u8 genmask = nft_genmask_cur(net); const u8 *rp = (const u8 *)key; @@ -422,15 +420,17 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, local_bh_disable(); - map_index = raw_cpu_read(nft_pipapo_scratch_index); - m = rcu_dereference(priv->match); if (unlikely(!m || !*raw_cpu_ptr(m->scratch))) goto out; - res_map = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0); - fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max); + scratch = *raw_cpu_ptr(m->scratch); + + map_index = scratch->map_index; + + res_map = scratch->map + (map_index ? m->bsize_max : 0); + fill_map = scratch->map + (map_index ? 0 : m->bsize_max); memset(res_map, 0xff, m->bsize_max * sizeof(*res_map)); @@ -460,7 +460,7 @@ next_match: b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt, last); if (b < 0) { - raw_cpu_write(nft_pipapo_scratch_index, map_index); + scratch->map_index = map_index; local_bh_enable(); return false; @@ -477,7 +477,7 @@ next_match: * current inactive bitmap is clean and can be reused as * *next* bitmap (not initial) for the next packet. */ - raw_cpu_write(nft_pipapo_scratch_index, map_index); + scratch->map_index = map_index; local_bh_enable(); return true; @@ -504,6 +504,7 @@ out: * @set: nftables API set representation * @data: Key data to be matched against existing elements * @genmask: If set, check that element is active in given genmask + * @tstamp: timestamp to check for expired elements * * This is essentially the same as the lookup function, except that it matches * key data against the uncommitted copy and doesn't use preallocated maps for @@ -513,7 +514,8 @@ out: */ static struct nft_pipapo_elem *pipapo_get(const struct net *net, const struct nft_set *set, - const u8 *data, u8 genmask) + const u8 *data, u8 genmask, + u64 tstamp) { struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT); struct nft_pipapo *priv = nft_set_priv(set); @@ -566,7 +568,7 @@ next_match: goto out; if (last) { - if (nft_set_elem_expired(&f->mt[b].e->ext)) + if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp)) goto next_match; if ((genmask && !nft_set_elem_active(&f->mt[b].e->ext, genmask))) @@ -599,11 +601,18 @@ out: * @elem: nftables API element representation containing key data * @flags: Unused */ -static void *nft_pipapo_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static struct nft_elem_priv * +nft_pipapo_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { - return pipapo_get(net, set, (const u8 *)elem->key.val.data, - nft_genmask_cur(net)); + struct nft_pipapo_elem *e; + + e = pipapo_get(net, set, (const u8 *)elem->key.val.data, + nft_genmask_cur(net), get_jiffies_64()); + if (IS_ERR(e)) + return ERR_CAST(e); + + return &e->priv; } /** @@ -1101,6 +1110,25 @@ static void pipapo_map(struct nft_pipapo_match *m, f->mt[map[i].to + j].e = e; } +/** + * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address + * @m: Matching data + * @cpu: CPU number + */ +static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu) +{ + struct nft_pipapo_scratch *s; + void *mem; + + s = *per_cpu_ptr(m->scratch, cpu); + if (!s) + return; + + mem = s; + mem -= s->align_off; + kfree(mem); +} + /** * pipapo_realloc_scratch() - Reallocate scratch maps for partial match results * @clone: Copy of matching data with pending insertions and deletions @@ -1114,12 +1142,13 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, int i; for_each_possible_cpu(i) { - unsigned long *scratch; + struct nft_pipapo_scratch *scratch; #ifdef NFT_PIPAPO_ALIGN - unsigned long *scratch_aligned; + void *scratch_aligned; + u32 align_off; #endif - - scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 + + scratch = kzalloc_node(struct_size(scratch, map, + bsize_max * 2) + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL, cpu_to_node(i)); if (!scratch) { @@ -1133,14 +1162,25 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, return -ENOMEM; } - kfree(*per_cpu_ptr(clone->scratch, i)); - - *per_cpu_ptr(clone->scratch, i) = scratch; + pipapo_free_scratch(clone, i); #ifdef NFT_PIPAPO_ALIGN - scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch); - *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned; + /* Align &scratch->map (not the struct itself): the extra + * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node() + * above guarantee we can waste up to those bytes in order + * to align the map field regardless of its offset within + * the struct. + */ + BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM); + + scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map); + scratch_aligned -= offsetof(struct nft_pipapo_scratch, map); + align_off = scratch_aligned - (void *)scratch; + + scratch = scratch_aligned; + scratch->align_off = align_off; #endif + *per_cpu_ptr(clone->scratch, i) = scratch; } return 0; @@ -1151,21 +1191,22 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, * @net: Network namespace * @set: nftables API set representation * @elem: nftables API element representation containing key data - * @ext2: Filled with pointer to &struct nft_set_ext in inserted element + * @elem_priv: Filled with pointer to &struct nft_set_ext in inserted element * * Return: 0 on success, error pointer on failure. */ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **ext2) + struct nft_elem_priv **elem_priv) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; const u8 *start = (const u8 *)elem->key.val.data, *end; - struct nft_pipapo_elem *e = elem->priv, *dup; struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m = priv->clone; u8 genmask = nft_genmask_next(net); + struct nft_pipapo_elem *e, *dup; + u64 tstamp = nft_net_tstamp(net); struct nft_pipapo_field *f; const u8 *start_p, *end_p; int i, bsize_max, err = 0; @@ -1175,7 +1216,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, else end = start; - dup = pipapo_get(net, set, start, genmask); + dup = pipapo_get(net, set, start, genmask, tstamp); if (!IS_ERR(dup)) { /* Check if we already have the same exact entry */ const struct nft_data *dup_key, *dup_end; @@ -1188,7 +1229,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, if (!memcmp(start, dup_key->data, sizeof(*dup_key->data)) && !memcmp(end, dup_end->data, sizeof(*dup_end->data))) { - *ext2 = &dup->ext; + *elem_priv = &dup->priv; return -EEXIST; } @@ -1197,13 +1238,13 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, if (PTR_ERR(dup) == -ENOENT) { /* Look for partially overlapping entries */ - dup = pipapo_get(net, set, end, nft_genmask_next(net)); + dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp); } if (PTR_ERR(dup) != -ENOENT) { if (IS_ERR(dup)) return PTR_ERR(dup); - *ext2 = &dup->ext; + *elem_priv = &dup->priv; return -ENOTEMPTY; } @@ -1263,7 +1304,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, put_cpu_ptr(m->scratch); } - *ext2 = &e->ext; + e = nft_elem_priv_cast(elem->priv); + *elem_priv = &e->priv; pipapo_map(m, rulemap, e); @@ -1293,11 +1335,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) if (!new->scratch) goto out_scratch; -#ifdef NFT_PIPAPO_ALIGN - new->scratch_aligned = alloc_percpu(*new->scratch_aligned); - if (!new->scratch_aligned) - goto out_scratch; -#endif for_each_possible_cpu(i) *per_cpu_ptr(new->scratch, i) = NULL; @@ -1349,10 +1386,7 @@ out_lt: } out_scratch_realloc: for_each_possible_cpu(i) - kfree(*per_cpu_ptr(new->scratch, i)); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(new->scratch_aligned); -#endif + pipapo_free_scratch(new, i); out_scratch: free_percpu(new->scratch); kfree(new); @@ -1540,23 +1574,19 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set, struct nft_pipapo_elem *e) { - struct nft_set_elem elem = { - .priv = e, - }; - - nft_setelem_data_deactivate(net, set, &elem); + nft_setelem_data_deactivate(net, set, &e->priv); } /** * pipapo_gc() - Drop expired entries from set, destroy start and end elements - * @_set: nftables API set representation + * @set: nftables API set representation * @m: Matching data */ -static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) +static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) { - struct nft_set *set = (struct nft_set *) _set; struct nft_pipapo *priv = nft_set_priv(set); struct net *net = read_pnet(&set->net); + u64 tstamp = nft_net_tstamp(net); int rules_f0, first_rule = 0; struct nft_pipapo_elem *e; struct nft_trans_gc *gc; @@ -1591,7 +1621,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) /* synchronous gc never fails, there is no need to set on * NFT_SET_ELEM_DEAD_BIT. */ - if (nft_set_elem_expired(&e->ext)) { + if (__nft_set_elem_expired(&e->ext, tstamp)) { priv->dirty = true; gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); @@ -1637,13 +1667,9 @@ static void pipapo_free_match(struct nft_pipapo_match *m) int i; for_each_possible_cpu(i) - kfree(*per_cpu_ptr(m->scratch, i)); + pipapo_free_scratch(m, i); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif free_percpu(m->scratch); - pipapo_free_fields(m); kfree(m); @@ -1672,7 +1698,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) * We also need to create a new working copy for subsequent insertions and * deletions. */ -static void nft_pipapo_commit(const struct nft_set *set) +static void nft_pipapo_commit(struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *new_clone, *old; @@ -1732,7 +1758,7 @@ static void nft_pipapo_abort(const struct nft_set *set) * nft_pipapo_activate() - Mark element reference as active given key, commit * @net: Network namespace * @set: nftables API set representation - * @elem: nftables API element representation containing key data + * @elem_priv: nftables API element representation containing key data * * On insertion, elements are added to a copy of the matching data currently * in use for lookups, and not directly inserted into current lookup data. Both @@ -1741,9 +1767,9 @@ static void nft_pipapo_abort(const struct nft_set *set) */ static void nft_pipapo_activate(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - struct nft_pipapo_elem *e = elem->priv; + struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &e->ext); } @@ -1766,7 +1792,7 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set, { struct nft_pipapo_elem *e; - e = pipapo_get(net, set, data, nft_genmask_next(net)); + e = pipapo_get(net, set, data, nft_genmask_next(net), nft_net_tstamp(net)); if (IS_ERR(e)) return NULL; @@ -1783,9 +1809,9 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set, * * Return: deactivated element if found, NULL otherwise. */ -static void *nft_pipapo_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static struct nft_elem_priv * +nft_pipapo_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); @@ -1796,7 +1822,7 @@ static void *nft_pipapo_deactivate(const struct net *net, * nft_pipapo_flush() - Call pipapo_deactivate() to make element inactive * @net: Network namespace * @set: nftables API set representation - * @elem: nftables API element representation containing key data + * @elem_priv: nftables API element representation containing key data * * This is functionally the same as nft_pipapo_deactivate(), with a slightly * different interface, and it's also called once for each element in a set @@ -1810,13 +1836,12 @@ static void *nft_pipapo_deactivate(const struct net *net, * * Return: true if element was found and deactivated. */ -static bool nft_pipapo_flush(const struct net *net, const struct nft_set *set, - void *elem) +static void nft_pipapo_flush(const struct net *net, const struct nft_set *set, + struct nft_elem_priv *elem_priv) { - struct nft_pipapo_elem *e = elem; + struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv); - return pipapo_deactivate(net, set, (const u8 *)nft_set_ext_key(&e->ext), - &e->ext); + nft_set_elem_change_active(net, set, &e->ext); } /** @@ -1939,7 +1964,7 @@ static bool pipapo_match_field(struct nft_pipapo_field *f, * nft_pipapo_remove() - Remove element given key, commit * @net: Network namespace * @set: nftables API set representation - * @elem: nftables API element representation containing key data + * @elem_priv: nftables API element representation containing key data * * Similarly to nft_pipapo_activate(), this is used as commit operation by the * API, but it's called once per element in the pending transaction, so we can't @@ -1947,14 +1972,15 @@ static bool pipapo_match_field(struct nft_pipapo_field *f, * the matched element here, if any, and commit the updated matching data. */ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m = priv->clone; - struct nft_pipapo_elem *e = elem->priv; int rules_f0, first_rule = 0; + struct nft_pipapo_elem *e; const u8 *data; + e = nft_elem_priv_cast(elem_priv); data = (const u8 *)nft_set_ext_key(&e->ext); while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { @@ -2031,7 +2057,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, for (r = 0; r < f->rules; r++) { struct nft_pipapo_elem *e; - struct nft_set_elem elem; if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e) continue; @@ -2044,9 +2069,7 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, if (!nft_set_elem_active(&e->ext, iter->genmask)) goto cont; - elem.priv = e; - - iter->err = iter->fn(ctx, set, iter, &elem); + iter->err = iter->fn(ctx, set, iter, &e->priv); if (iter->err < 0) goto out; @@ -2118,6 +2141,8 @@ static int nft_pipapo_init(const struct nft_set *set, struct nft_pipapo_field *f; int err, i, field_count; + BUILD_BUG_ON(offsetof(struct nft_pipapo_elem, priv) != 0); + field_count = desc->field_count ? : 1; if (field_count > NFT_PIPAPO_MAX_FIELDS) @@ -2130,7 +2155,7 @@ static int nft_pipapo_init(const struct nft_set *set, m->field_count = field_count; m->bsize_max = 0; - m->scratch = alloc_percpu(unsigned long *); + m->scratch = alloc_percpu(struct nft_pipapo_scratch *); if (!m->scratch) { err = -ENOMEM; goto out_scratch; @@ -2138,16 +2163,6 @@ static int nft_pipapo_init(const struct nft_set *set, for_each_possible_cpu(i) *per_cpu_ptr(m->scratch, i) = NULL; -#ifdef NFT_PIPAPO_ALIGN - m->scratch_aligned = alloc_percpu(unsigned long *); - if (!m->scratch_aligned) { - err = -ENOMEM; - goto out_free; - } - for_each_possible_cpu(i) - *per_cpu_ptr(m->scratch_aligned, i) = NULL; -#endif - rcu_head_init(&m->rcu); nft_pipapo_for_each_field(f, i, m) { @@ -2178,9 +2193,6 @@ static int nft_pipapo_init(const struct nft_set *set, return 0; out_free: -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif free_percpu(m->scratch); out_scratch: kfree(m); @@ -2212,7 +2224,7 @@ static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx, e = f->mt[r].e; - nf_tables_set_elem_destroy(ctx, set, e); + nf_tables_set_elem_destroy(ctx, set, &e->priv); } } @@ -2234,11 +2246,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, nft_set_pipapo_match_destroy(ctx, set, m); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif for_each_possible_cpu(cpu) - kfree(*per_cpu_ptr(m->scratch, cpu)); + pipapo_free_scratch(m, cpu); free_percpu(m->scratch); pipapo_free_fields(m); kfree(m); @@ -2251,11 +2260,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, if (priv->dirty) nft_set_pipapo_match_destroy(ctx, set, m); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(priv->clone->scratch_aligned); -#endif for_each_possible_cpu(cpu) - kfree(*per_cpu_ptr(priv->clone->scratch, cpu)); + pipapo_free_scratch(priv->clone, cpu); free_percpu(priv->clone->scratch); pipapo_free_fields(priv->clone); -- cgit v1.2.3