summaryrefslogtreecommitdiffstats
path: root/net/netfilter/nft_set_pipapo.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 17:40:19 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 17:40:19 +0000
commit9f0fc191371843c4fc000a226b0a26b6c059aacd (patch)
tree35f8be3ef04506ac891ad001e8c41e535ae8d01d /net/netfilter/nft_set_pipapo.c
parentReleasing progress-linux version 6.6.15-2~progress7.99u1. (diff)
downloadlinux-9f0fc191371843c4fc000a226b0a26b6c059aacd.tar.xz
linux-9f0fc191371843c4fc000a226b0a26b6c059aacd.zip
Merging upstream version 6.7.7.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'net/netfilter/nft_set_pipapo.c')
-rw-r--r--net/netfilter/nft_set_pipapo.c204
1 files changed, 105 insertions, 99 deletions
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 3ff31043f..3089c4ca8 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -342,9 +342,6 @@
#include "nft_set_pipapo_avx2.h"
#include "nft_set_pipapo.h"
-/* Current working bitmap index, toggled between field matches */
-static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index);
-
/**
* pipapo_refill() - For each set bit, set bits from selected mapping table item
* @map: Bitmap to be scanned for set bits
@@ -412,6 +409,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_scratch *scratch;
unsigned long *res_map, *fill_map;
u8 genmask = nft_genmask_cur(net);
const u8 *rp = (const u8 *)key;
@@ -422,15 +420,17 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
local_bh_disable();
- map_index = raw_cpu_read(nft_pipapo_scratch_index);
-
m = rcu_dereference(priv->match);
if (unlikely(!m || !*raw_cpu_ptr(m->scratch)))
goto out;
- res_map = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0);
- fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max);
+ scratch = *raw_cpu_ptr(m->scratch);
+
+ map_index = scratch->map_index;
+
+ res_map = scratch->map + (map_index ? m->bsize_max : 0);
+ fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
@@ -460,7 +460,7 @@ next_match:
b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
last);
if (b < 0) {
- raw_cpu_write(nft_pipapo_scratch_index, map_index);
+ scratch->map_index = map_index;
local_bh_enable();
return false;
@@ -477,7 +477,7 @@ next_match:
* current inactive bitmap is clean and can be reused as
* *next* bitmap (not initial) for the next packet.
*/
- raw_cpu_write(nft_pipapo_scratch_index, map_index);
+ scratch->map_index = map_index;
local_bh_enable();
return true;
@@ -504,6 +504,7 @@ out:
* @set: nftables API set representation
* @data: Key data to be matched against existing elements
* @genmask: If set, check that element is active in given genmask
+ * @tstamp: timestamp to check for expired elements
*
* This is essentially the same as the lookup function, except that it matches
* key data against the uncommitted copy and doesn't use preallocated maps for
@@ -513,7 +514,8 @@ out:
*/
static struct nft_pipapo_elem *pipapo_get(const struct net *net,
const struct nft_set *set,
- const u8 *data, u8 genmask)
+ const u8 *data, u8 genmask,
+ u64 tstamp)
{
struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
struct nft_pipapo *priv = nft_set_priv(set);
@@ -566,7 +568,7 @@ next_match:
goto out;
if (last) {
- if (nft_set_elem_expired(&f->mt[b].e->ext))
+ if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp))
goto next_match;
if ((genmask &&
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
@@ -599,11 +601,18 @@ out:
* @elem: nftables API element representation containing key data
* @flags: Unused
*/
-static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem, unsigned int flags)
+static struct nft_elem_priv *
+nft_pipapo_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
{
- return pipapo_get(net, set, (const u8 *)elem->key.val.data,
- nft_genmask_cur(net));
+ struct nft_pipapo_elem *e;
+
+ e = pipapo_get(net, set, (const u8 *)elem->key.val.data,
+ nft_genmask_cur(net), get_jiffies_64());
+ if (IS_ERR(e))
+ return ERR_CAST(e);
+
+ return &e->priv;
}
/**
@@ -1102,6 +1111,25 @@ static void pipapo_map(struct nft_pipapo_match *m,
}
/**
+ * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address
+ * @m: Matching data
+ * @cpu: CPU number
+ */
+static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu)
+{
+ struct nft_pipapo_scratch *s;
+ void *mem;
+
+ s = *per_cpu_ptr(m->scratch, cpu);
+ if (!s)
+ return;
+
+ mem = s;
+ mem -= s->align_off;
+ kfree(mem);
+}
+
+/**
* pipapo_realloc_scratch() - Reallocate scratch maps for partial match results
* @clone: Copy of matching data with pending insertions and deletions
* @bsize_max: Maximum bucket size, scratch maps cover two buckets
@@ -1114,12 +1142,13 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
int i;
for_each_possible_cpu(i) {
- unsigned long *scratch;
+ struct nft_pipapo_scratch *scratch;
#ifdef NFT_PIPAPO_ALIGN
- unsigned long *scratch_aligned;
+ void *scratch_aligned;
+ u32 align_off;
#endif
-
- scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 +
+ scratch = kzalloc_node(struct_size(scratch, map,
+ bsize_max * 2) +
NFT_PIPAPO_ALIGN_HEADROOM,
GFP_KERNEL, cpu_to_node(i));
if (!scratch) {
@@ -1133,14 +1162,25 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
return -ENOMEM;
}
- kfree(*per_cpu_ptr(clone->scratch, i));
-
- *per_cpu_ptr(clone->scratch, i) = scratch;
+ pipapo_free_scratch(clone, i);
#ifdef NFT_PIPAPO_ALIGN
- scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch);
- *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned;
+ /* Align &scratch->map (not the struct itself): the extra
+ * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node()
+ * above guarantee we can waste up to those bytes in order
+ * to align the map field regardless of its offset within
+ * the struct.
+ */
+ BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM);
+
+ scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map);
+ scratch_aligned -= offsetof(struct nft_pipapo_scratch, map);
+ align_off = scratch_aligned - (void *)scratch;
+
+ scratch = scratch_aligned;
+ scratch->align_off = align_off;
#endif
+ *per_cpu_ptr(clone->scratch, i) = scratch;
}
return 0;
@@ -1151,21 +1191,22 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
* @net: Network namespace
* @set: nftables API set representation
* @elem: nftables API element representation containing key data
- * @ext2: Filled with pointer to &struct nft_set_ext in inserted element
+ * @elem_priv: Filled with pointer to &struct nft_set_ext in inserted element
*
* Return: 0 on success, error pointer on failure.
*/
static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **ext2)
+ struct nft_elem_priv **elem_priv)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const u8 *start = (const u8 *)elem->key.val.data, *end;
- struct nft_pipapo_elem *e = elem->priv, *dup;
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m = priv->clone;
u8 genmask = nft_genmask_next(net);
+ struct nft_pipapo_elem *e, *dup;
+ u64 tstamp = nft_net_tstamp(net);
struct nft_pipapo_field *f;
const u8 *start_p, *end_p;
int i, bsize_max, err = 0;
@@ -1175,7 +1216,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
else
end = start;
- dup = pipapo_get(net, set, start, genmask);
+ dup = pipapo_get(net, set, start, genmask, tstamp);
if (!IS_ERR(dup)) {
/* Check if we already have the same exact entry */
const struct nft_data *dup_key, *dup_end;
@@ -1188,7 +1229,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (!memcmp(start, dup_key->data, sizeof(*dup_key->data)) &&
!memcmp(end, dup_end->data, sizeof(*dup_end->data))) {
- *ext2 = &dup->ext;
+ *elem_priv = &dup->priv;
return -EEXIST;
}
@@ -1197,13 +1238,13 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (PTR_ERR(dup) == -ENOENT) {
/* Look for partially overlapping entries */
- dup = pipapo_get(net, set, end, nft_genmask_next(net));
+ dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp);
}
if (PTR_ERR(dup) != -ENOENT) {
if (IS_ERR(dup))
return PTR_ERR(dup);
- *ext2 = &dup->ext;
+ *elem_priv = &dup->priv;
return -ENOTEMPTY;
}
@@ -1263,7 +1304,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
put_cpu_ptr(m->scratch);
}
- *ext2 = &e->ext;
+ e = nft_elem_priv_cast(elem->priv);
+ *elem_priv = &e->priv;
pipapo_map(m, rulemap, e);
@@ -1293,11 +1335,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
if (!new->scratch)
goto out_scratch;
-#ifdef NFT_PIPAPO_ALIGN
- new->scratch_aligned = alloc_percpu(*new->scratch_aligned);
- if (!new->scratch_aligned)
- goto out_scratch;
-#endif
for_each_possible_cpu(i)
*per_cpu_ptr(new->scratch, i) = NULL;
@@ -1349,10 +1386,7 @@ out_lt:
}
out_scratch_realloc:
for_each_possible_cpu(i)
- kfree(*per_cpu_ptr(new->scratch, i));
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(new->scratch_aligned);
-#endif
+ pipapo_free_scratch(new, i);
out_scratch:
free_percpu(new->scratch);
kfree(new);
@@ -1540,23 +1574,19 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
struct nft_pipapo_elem *e)
{
- struct nft_set_elem elem = {
- .priv = e,
- };
-
- nft_setelem_data_deactivate(net, set, &elem);
+ nft_setelem_data_deactivate(net, set, &e->priv);
}
/**
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
- * @_set: nftables API set representation
+ * @set: nftables API set representation
* @m: Matching data
*/
-static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
+static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
{
- struct nft_set *set = (struct nft_set *) _set;
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
+ u64 tstamp = nft_net_tstamp(net);
int rules_f0, first_rule = 0;
struct nft_pipapo_elem *e;
struct nft_trans_gc *gc;
@@ -1591,7 +1621,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
/* synchronous gc never fails, there is no need to set on
* NFT_SET_ELEM_DEAD_BIT.
*/
- if (nft_set_elem_expired(&e->ext)) {
+ if (__nft_set_elem_expired(&e->ext, tstamp)) {
priv->dirty = true;
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
@@ -1637,13 +1667,9 @@ static void pipapo_free_match(struct nft_pipapo_match *m)
int i;
for_each_possible_cpu(i)
- kfree(*per_cpu_ptr(m->scratch, i));
+ pipapo_free_scratch(m, i);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
free_percpu(m->scratch);
-
pipapo_free_fields(m);
kfree(m);
@@ -1672,7 +1698,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
* We also need to create a new working copy for subsequent insertions and
* deletions.
*/
-static void nft_pipapo_commit(const struct nft_set *set)
+static void nft_pipapo_commit(struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *new_clone, *old;
@@ -1732,7 +1758,7 @@ static void nft_pipapo_abort(const struct nft_set *set)
* nft_pipapo_activate() - Mark element reference as active given key, commit
* @net: Network namespace
* @set: nftables API set representation
- * @elem: nftables API element representation containing key data
+ * @elem_priv: nftables API element representation containing key data
*
* On insertion, elements are added to a copy of the matching data currently
* in use for lookups, and not directly inserted into current lookup data. Both
@@ -1741,9 +1767,9 @@ static void nft_pipapo_abort(const struct nft_set *set)
*/
static void nft_pipapo_activate(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- struct nft_pipapo_elem *e = elem->priv;
+ struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &e->ext);
}
@@ -1766,7 +1792,7 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
{
struct nft_pipapo_elem *e;
- e = pipapo_get(net, set, data, nft_genmask_next(net));
+ e = pipapo_get(net, set, data, nft_genmask_next(net), nft_net_tstamp(net));
if (IS_ERR(e))
return NULL;
@@ -1783,9 +1809,9 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
*
* Return: deactivated element if found, NULL otherwise.
*/
-static void *nft_pipapo_deactivate(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
+static struct nft_elem_priv *
+nft_pipapo_deactivate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
@@ -1796,7 +1822,7 @@ static void *nft_pipapo_deactivate(const struct net *net,
* nft_pipapo_flush() - Call pipapo_deactivate() to make element inactive
* @net: Network namespace
* @set: nftables API set representation
- * @elem: nftables API element representation containing key data
+ * @elem_priv: nftables API element representation containing key data
*
* This is functionally the same as nft_pipapo_deactivate(), with a slightly
* different interface, and it's also called once for each element in a set
@@ -1810,13 +1836,12 @@ static void *nft_pipapo_deactivate(const struct net *net,
*
* Return: true if element was found and deactivated.
*/
-static bool nft_pipapo_flush(const struct net *net, const struct nft_set *set,
- void *elem)
+static void nft_pipapo_flush(const struct net *net, const struct nft_set *set,
+ struct nft_elem_priv *elem_priv)
{
- struct nft_pipapo_elem *e = elem;
+ struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv);
- return pipapo_deactivate(net, set, (const u8 *)nft_set_ext_key(&e->ext),
- &e->ext);
+ nft_set_elem_change_active(net, set, &e->ext);
}
/**
@@ -1939,7 +1964,7 @@ static bool pipapo_match_field(struct nft_pipapo_field *f,
* nft_pipapo_remove() - Remove element given key, commit
* @net: Network namespace
* @set: nftables API set representation
- * @elem: nftables API element representation containing key data
+ * @elem_priv: nftables API element representation containing key data
*
* Similarly to nft_pipapo_activate(), this is used as commit operation by the
* API, but it's called once per element in the pending transaction, so we can't
@@ -1947,14 +1972,15 @@ static bool pipapo_match_field(struct nft_pipapo_field *f,
* the matched element here, if any, and commit the updated matching data.
*/
static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m = priv->clone;
- struct nft_pipapo_elem *e = elem->priv;
int rules_f0, first_rule = 0;
+ struct nft_pipapo_elem *e;
const u8 *data;
+ e = nft_elem_priv_cast(elem_priv);
data = (const u8 *)nft_set_ext_key(&e->ext);
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
@@ -2031,7 +2057,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
for (r = 0; r < f->rules; r++) {
struct nft_pipapo_elem *e;
- struct nft_set_elem elem;
if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
continue;
@@ -2044,9 +2069,7 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
if (!nft_set_elem_active(&e->ext, iter->genmask))
goto cont;
- elem.priv = e;
-
- iter->err = iter->fn(ctx, set, iter, &elem);
+ iter->err = iter->fn(ctx, set, iter, &e->priv);
if (iter->err < 0)
goto out;
@@ -2118,6 +2141,8 @@ static int nft_pipapo_init(const struct nft_set *set,
struct nft_pipapo_field *f;
int err, i, field_count;
+ BUILD_BUG_ON(offsetof(struct nft_pipapo_elem, priv) != 0);
+
field_count = desc->field_count ? : 1;
if (field_count > NFT_PIPAPO_MAX_FIELDS)
@@ -2130,7 +2155,7 @@ static int nft_pipapo_init(const struct nft_set *set,
m->field_count = field_count;
m->bsize_max = 0;
- m->scratch = alloc_percpu(unsigned long *);
+ m->scratch = alloc_percpu(struct nft_pipapo_scratch *);
if (!m->scratch) {
err = -ENOMEM;
goto out_scratch;
@@ -2138,16 +2163,6 @@ static int nft_pipapo_init(const struct nft_set *set,
for_each_possible_cpu(i)
*per_cpu_ptr(m->scratch, i) = NULL;
-#ifdef NFT_PIPAPO_ALIGN
- m->scratch_aligned = alloc_percpu(unsigned long *);
- if (!m->scratch_aligned) {
- err = -ENOMEM;
- goto out_free;
- }
- for_each_possible_cpu(i)
- *per_cpu_ptr(m->scratch_aligned, i) = NULL;
-#endif
-
rcu_head_init(&m->rcu);
nft_pipapo_for_each_field(f, i, m) {
@@ -2178,9 +2193,6 @@ static int nft_pipapo_init(const struct nft_set *set,
return 0;
out_free:
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
free_percpu(m->scratch);
out_scratch:
kfree(m);
@@ -2212,7 +2224,7 @@ static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx,
e = f->mt[r].e;
- nf_tables_set_elem_destroy(ctx, set, e);
+ nf_tables_set_elem_destroy(ctx, set, &e->priv);
}
}
@@ -2234,11 +2246,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
nft_set_pipapo_match_destroy(ctx, set, m);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
for_each_possible_cpu(cpu)
- kfree(*per_cpu_ptr(m->scratch, cpu));
+ pipapo_free_scratch(m, cpu);
free_percpu(m->scratch);
pipapo_free_fields(m);
kfree(m);
@@ -2251,11 +2260,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
if (priv->dirty)
nft_set_pipapo_match_destroy(ctx, set, m);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(priv->clone->scratch_aligned);
-#endif
for_each_possible_cpu(cpu)
- kfree(*per_cpu_ptr(priv->clone->scratch, cpu));
+ pipapo_free_scratch(priv->clone, cpu);
free_percpu(priv->clone->scratch);
pipapo_free_fields(priv->clone);