summaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 17:39:57 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 17:39:57 +0000
commitdc50eab76b709d68175a358d6e23a5a3890764d3 (patch)
treec754d0390db060af0213ff994f0ac310e4cfd6e9 /net/netfilter
parentAdding debian version 6.6.15-2. (diff)
downloadlinux-dc50eab76b709d68175a358d6e23a5a3890764d3.tar.xz
linux-dc50eab76b709d68175a358d6e23a5a3890764d3.zip
Merging upstream version 6.7.7.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/core.c6
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h14
-rw-r--r--net/netfilter/ipset/ip_set_core.c39
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h19
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_fo.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_lc.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_ovf.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c16
-rw-r--r--net/netfilter/ipvs/ip_vs_twos.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c1
-rw-r--r--net/netfilter/nf_conntrack_bpf.c6
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c1
-rw-r--r--net/netfilter/nf_conntrack_core.c76
-rw-r--r--net/netfilter/nf_conntrack_helper.c7
-rw-r--r--net/netfilter/nf_conntrack_labels.c17
-rw-r--r--net/netfilter/nf_conntrack_netlink.c1
-rw-r--r--net/netfilter/nf_conntrack_proto.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c17
-rw-r--r--net/netfilter/nf_flow_table_core.c17
-rw-r--r--net/netfilter/nf_log.c7
-rw-r--r--net/netfilter/nf_nat_bpf.c6
-rw-r--r--net/netfilter/nf_nat_core.c1
-rw-r--r--net/netfilter/nf_nat_proto.c71
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c670
-rw-r--r--net/netfilter/nf_tables_core.c8
-rw-r--r--net/netfilter/nf_tables_trace.c8
-rw-r--r--net/netfilter/nfnetlink_osf.c1
-rw-r--r--net/netfilter/nfnetlink_queue.c22
-rw-r--r--net/netfilter/nft_chain_nat.c1
-rw-r--r--net/netfilter/nft_compat.c17
-rw-r--r--net/netfilter/nft_ct.c27
-rw-r--r--net/netfilter/nft_dynset.c23
-rw-r--r--net/netfilter/nft_fib.c1
-rw-r--r--net/netfilter/nft_fwd_netdev.c1
-rw-r--r--net/netfilter/nft_set_bitmap.c53
-rw-r--r--net/netfilter/nft_set_hash.c117
-rw-r--r--net/netfilter/nft_set_pipapo.c204
-rw-r--r--net/netfilter/nft_set_pipapo.h22
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c19
-rw-r--r--net/netfilter/nft_set_rbtree.c219
-rw-r--r--net/netfilter/nft_tunnel.c1
55 files changed, 1007 insertions, 762 deletions
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index ef4e76e5a..3126911f5 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -639,10 +639,10 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
if (ret == 1)
continue;
return ret;
+ case NF_STOLEN:
+ return NF_DROP_GETERR(verdict);
default:
- /* Implicit handling for NF_STOLEN, as well as any other
- * non conventional verdicts.
- */
+ WARN_ON_ONCE(1);
return 0;
}
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 26ab0e961..9523104a9 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -28,6 +28,7 @@
#define mtype_del IPSET_TOKEN(MTYPE, _del)
#define mtype_list IPSET_TOKEN(MTYPE, _list)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
+#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype MTYPE
#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
@@ -57,9 +58,6 @@ mtype_destroy(struct ip_set *set)
{
struct mtype *map = set->data;
- if (SET_WITH_TIMEOUT(set))
- del_timer_sync(&map->gc);
-
if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
ip_set_free(map->members);
@@ -288,6 +286,15 @@ mtype_gc(struct timer_list *t)
add_timer(&map->gc);
}
+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+ struct mtype *map = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ del_timer_sync(&map->gc);
+}
+
static const struct ip_set_type_variant mtype = {
.kadt = mtype_kadt,
.uadt = mtype_uadt,
@@ -301,6 +308,7 @@ static const struct ip_set_type_variant mtype = {
.head = mtype_head,
.list = mtype_list,
.same_set = mtype_same_set,
+ .cancel_gc = mtype_cancel_gc,
};
#endif /* __IP_SET_BITMAP_IP_GEN_H */
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 4c133e06b..3184cc6be 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1154,6 +1154,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info,
return ret;
cleanup:
+ set->variant->cancel_gc(set);
set->variant->destroy(set);
put_out:
module_put(set->type->me);
@@ -1182,6 +1183,14 @@ ip_set_destroy_set(struct ip_set *set)
kfree(set);
}
+static void
+ip_set_destroy_set_rcu(struct rcu_head *head)
+{
+ struct ip_set *set = container_of(head, struct ip_set, rcu);
+
+ ip_set_destroy_set(set);
+}
+
static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const attr[])
{
@@ -1193,8 +1202,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
- /* Must wait for flush to be really finished in list:set */
- rcu_barrier();
/* Commands are serialized and references are
* protected by the ip_set_ref_lock.
@@ -1206,8 +1213,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
* counter, so if it's already zero, we can proceed
* without holding the lock.
*/
- read_lock_bh(&ip_set_ref_lock);
if (!attr[IPSET_ATTR_SETNAME]) {
+ /* Must wait for flush to be really finished in list:set */
+ rcu_barrier();
+ read_lock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
if (s && (s->ref || s->ref_netlink)) {
@@ -1221,6 +1230,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
s = ip_set(inst, i);
if (s) {
ip_set(inst, i) = NULL;
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
ip_set_destroy_set(s);
}
}
@@ -1228,6 +1239,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
inst->is_destroyed = false;
} else {
u32 flags = flag_exist(info->nlh);
+ u16 features = 0;
+
+ read_lock_bh(&ip_set_ref_lock);
s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
&i);
if (!s) {
@@ -1238,10 +1252,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
ret = -IPSET_ERR_BUSY;
goto out;
}
+ features = s->type->features;
ip_set(inst, i) = NULL;
read_unlock_bh(&ip_set_ref_lock);
-
- ip_set_destroy_set(s);
+ if (features & IPSET_TYPE_NAME) {
+ /* Must wait for flush to be really finished */
+ rcu_barrier();
+ }
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
+ call_rcu(&s->rcu, ip_set_destroy_set_rcu);
}
return 0;
out:
@@ -1394,9 +1414,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
ip_set(inst, to_id) = from;
write_unlock_bh(&ip_set_ref_lock);
- /* Make sure all readers of the old set pointers are completed. */
- synchronize_rcu();
-
return 0;
}
@@ -2362,6 +2379,7 @@ ip_set_net_exit(struct net *net)
set = ip_set(inst, i);
if (set) {
ip_set(inst, i) = NULL;
+ set->variant->cancel_gc(set);
ip_set_destroy_set(set);
}
}
@@ -2409,8 +2427,11 @@ ip_set_fini(void)
{
nf_unregister_sockopt(&so_set);
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-
unregister_pernet_subsys(&ip_set_net_ops);
+
+ /* Wait for call_rcu() in destroy */
+ rcu_barrier();
+
pr_debug("these are the famous last words\n");
}
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 7c2399541..20aad81fc 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -221,6 +221,7 @@ static const union nf_inet_addr zeromask = {};
#undef mtype_gc_do
#undef mtype_gc
#undef mtype_gc_init
+#undef mtype_cancel_gc
#undef mtype_variant
#undef mtype_data_match
@@ -265,6 +266,7 @@ static const union nf_inet_addr zeromask = {};
#define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
@@ -429,7 +431,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
u32 i;
for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference(hbucket(t, i));
+ n = (__force struct hbucket *)hbucket(t, i);
if (!n)
continue;
if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
@@ -449,10 +451,7 @@ mtype_destroy(struct ip_set *set)
struct htype *h = set->data;
struct list_head *l, *lt;
- if (SET_WITH_TIMEOUT(set))
- cancel_delayed_work_sync(&h->gc.dwork);
-
- mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
+ mtype_ahash_destroy(set, (__force struct htable *)h->table, true);
list_for_each_safe(l, lt, &h->ad) {
list_del(l);
kfree(l);
@@ -598,6 +597,15 @@ mtype_gc_init(struct htable_gc *gc)
queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ);
}
+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+ struct htype *h = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ cancel_delayed_work_sync(&h->gc.dwork);
+}
+
static int
mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags);
@@ -1440,6 +1448,7 @@ static const struct ip_set_type_variant mtype_variant = {
.uref = mtype_uref,
.resize = mtype_resize,
.same_set = mtype_same_set,
+ .cancel_gc = mtype_cancel_gc,
.region_lock = true,
};
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index e16263652..6c3f28bc5 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set)
struct list_set *map = set->data;
struct set_elem *e, *n;
- if (SET_WITH_TIMEOUT(set))
- timer_shutdown_sync(&map->gc);
-
list_for_each_entry_safe(e, n, &map->members, list) {
list_del(&e->list);
ip_set_put_byindex(map->net, e->id);
@@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
a->extensions == b->extensions;
}
+static void
+list_set_cancel_gc(struct ip_set *set)
+{
+ struct list_set *map = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ timer_shutdown_sync(&map->gc);
+}
+
static const struct ip_set_type_variant set_variant = {
.kadt = list_set_kadt,
.uadt = list_set_uadt,
@@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = {
.head = list_set_head,
.list = list_set_list,
.same_set = list_set_same_set,
+ .cancel_gc = list_set_cancel_gc,
};
static void
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 3230506ae..a2c16b501 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2450,3 +2450,4 @@ static void __exit ip_vs_cleanup(void)
module_init(ip_vs_init);
module_exit(ip_vs_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IP Virtual Server");
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 5e6ec32af..75f4c231f 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -270,3 +270,4 @@ static void __exit ip_vs_dh_cleanup(void)
module_init(ip_vs_dh_init);
module_exit(ip_vs_dh_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs destination hashing scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_fo.c b/net/netfilter/ipvs/ip_vs_fo.c
index b846cc385..ab117e5bc 100644
--- a/net/netfilter/ipvs/ip_vs_fo.c
+++ b/net/netfilter/ipvs/ip_vs_fo.c
@@ -72,3 +72,4 @@ static void __exit ip_vs_fo_cleanup(void)
module_init(ip_vs_fo_init);
module_exit(ip_vs_fo_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs weighted failover scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index ef1f45e43..f53899d12 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -635,3 +635,4 @@ static void __exit ip_vs_ftp_exit(void)
module_init(ip_vs_ftp_init);
module_exit(ip_vs_ftp_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs ftp helper");
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index cf78ba4ce..8ceec7a2f 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -632,3 +632,4 @@ static void __exit ip_vs_lblc_cleanup(void)
module_init(ip_vs_lblc_init);
module_exit(ip_vs_lblc_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs locality-based least-connection scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 9eddf118b..0fb647072 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -817,3 +817,4 @@ static void __exit ip_vs_lblcr_cleanup(void)
module_init(ip_vs_lblcr_init);
module_exit(ip_vs_lblcr_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs locality-based least-connection with replication scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index 9d34d81fc..c2764505e 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -86,3 +86,4 @@ static void __exit ip_vs_lc_cleanup(void)
module_init(ip_vs_lc_init);
module_exit(ip_vs_lc_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs least connection scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index f56862a87..ed7f5c889 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -136,3 +136,4 @@ static void __exit ip_vs_nq_cleanup(void)
module_init(ip_vs_nq_init);
module_exit(ip_vs_nq_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs never queue scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_ovf.c b/net/netfilter/ipvs/ip_vs_ovf.c
index c03066fdd..c7708b809 100644
--- a/net/netfilter/ipvs/ip_vs_ovf.c
+++ b/net/netfilter/ipvs/ip_vs_ovf.c
@@ -79,3 +79,4 @@ static void __exit ip_vs_ovf_cleanup(void)
module_init(ip_vs_ovf_init);
module_exit(ip_vs_ovf_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs overflow connection scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 0ac6705a6..e4ce1d9a6 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -185,3 +185,4 @@ static void __exit ip_vs_sip_cleanup(void)
module_init(ip_vs_sip_init);
module_exit(ip_vs_sip_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs sip helper");
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index 38495c6f6..6baa34dff 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -122,4 +122,5 @@ static void __exit ip_vs_rr_cleanup(void)
module_init(ip_vs_rr_init);
module_exit(ip_vs_rr_cleanup);
+MODULE_DESCRIPTION("ipvs round-robin scheduler");
MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index 7663288e5..a46f99a56 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -137,3 +137,4 @@ static void __exit ip_vs_sed_cleanup(void)
module_init(ip_vs_sed_init);
module_exit(ip_vs_sed_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs shortest expected delay scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index c2028e412..92e77d7a6 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -376,3 +376,4 @@ static void __exit ip_vs_sh_cleanup(void)
module_init(ip_vs_sh_init);
module_exit(ip_vs_sh_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs source hashing scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 4174076c6..eaf9f2ed0 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1298,17 +1298,13 @@ static void set_sock_size(struct sock *sk, int mode, int val)
static void set_mcast_loop(struct sock *sk, u_char loop)
{
/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
- lock_sock(sk);
inet_assign_bit(MC_LOOP, sk, loop);
#ifdef CONFIG_IP_VS_IPV6
- if (sk->sk_family == AF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
-
+ if (READ_ONCE(sk->sk_family) == AF_INET6) {
/* IPV6_MULTICAST_LOOP */
- np->mc_loop = loop ? 1 : 0;
+ inet6_assign_bit(MC6_LOOP, sk, loop);
}
#endif
- release_sock(sk);
}
/*
@@ -1320,13 +1316,13 @@ static void set_mcast_ttl(struct sock *sk, u_char ttl)
/* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
lock_sock(sk);
- inet->mc_ttl = ttl;
+ WRITE_ONCE(inet->mc_ttl, ttl);
#ifdef CONFIG_IP_VS_IPV6
if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
/* IPV6_MULTICAST_HOPS */
- np->mcast_hops = ttl;
+ WRITE_ONCE(np->mcast_hops, ttl);
}
#endif
release_sock(sk);
@@ -1339,13 +1335,13 @@ static void set_mcast_pmtudisc(struct sock *sk, int val)
/* setsockopt(sock, SOL_IP, IP_MTU_DISCOVER, &val, sizeof(val)); */
lock_sock(sk);
- inet->pmtudisc = val;
+ WRITE_ONCE(inet->pmtudisc, val);
#ifdef CONFIG_IP_VS_IPV6
if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
/* IPV6_MTU_DISCOVER */
- np->pmtudisc = val;
+ WRITE_ONCE(np->pmtudisc, val);
}
#endif
release_sock(sk);
diff --git a/net/netfilter/ipvs/ip_vs_twos.c b/net/netfilter/ipvs/ip_vs_twos.c
index 3308e4cc7..8d5419edd 100644
--- a/net/netfilter/ipvs/ip_vs_twos.c
+++ b/net/netfilter/ipvs/ip_vs_twos.c
@@ -137,3 +137,4 @@ static void __exit ip_vs_twos_cleanup(void)
module_init(ip_vs_twos_init);
module_exit(ip_vs_twos_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs power of twos choice scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index 09f584b56..9fa500927 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -109,3 +109,4 @@ static void __exit ip_vs_wlc_cleanup(void)
module_init(ip_vs_wlc_init);
module_exit(ip_vs_wlc_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs weighted least connection scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 1bc7a0789..85ce0d04a 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -263,3 +263,4 @@ static void __exit ip_vs_wrr_cleanup(void)
module_init(ip_vs_wrr_init);
module_exit(ip_vs_wrr_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs weighted round-robin scheduler");
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index b21799d46..475358ec8 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -230,9 +230,7 @@ static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
return 0;
}
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
- "Global functions as their definitions will be in nf_conntrack BTF");
+__bpf_kfunc_start_defs();
/* bpf_xdp_ct_alloc - Allocate a new CT entry
*
@@ -467,7 +465,7 @@ __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
return nf_ct_change_status_common(nfct, status);
}
-__diag_pop()
+__bpf_kfunc_end_defs();
BTF_SET8_START(nf_ct_kfunc_set)
BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 9fb9b8031..cfa0fe035 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -82,3 +82,4 @@ out:
EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Broadcast connection tracking helper");
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9f6f2e643..2e5f3864d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2042,24 +2042,6 @@ out:
}
EXPORT_SYMBOL_GPL(nf_conntrack_in);
-/* Alter reply tuple (maybe alter helper). This is for NAT, and is
- implicitly racy: see __nf_conntrack_confirm */
-void nf_conntrack_alter_reply(struct nf_conn *ct,
- const struct nf_conntrack_tuple *newreply)
-{
- struct nf_conn_help *help = nfct_help(ct);
-
- /* Should be unconfirmed, so not in hash table yet */
- WARN_ON(nf_ct_is_confirmed(ct));
-
- nf_ct_dump_tuple(newreply);
-
- ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
- if (ct->master || (help && !hlist_empty(&help->expectations)))
- return;
-}
-EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
-
/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
void __nf_ct_refresh_acct(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
@@ -2187,11 +2169,11 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num);
if (dataoff <= 0)
- return -1;
+ return NF_DROP;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
l4num, net, &tuple))
- return -1;
+ return NF_DROP;
if (ct->status & IPS_SRC_NAT) {
memcpy(tuple.src.u3.all,
@@ -2211,7 +2193,7 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
h = nf_conntrack_find_get(net, nf_ct_zone(ct), &tuple);
if (!h)
- return 0;
+ return NF_ACCEPT;
/* Store status bits of the conntrack that is clashing to re-do NAT
* mangling according to what it has been done already to this packet.
@@ -2224,19 +2206,25 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
nat_hook = rcu_dereference(nf_nat_hook);
if (!nat_hook)
- return 0;
+ return NF_ACCEPT;
- if (status & IPS_SRC_NAT &&
- nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_SRC,
- IP_CT_DIR_ORIGINAL) == NF_DROP)
- return -1;
+ if (status & IPS_SRC_NAT) {
+ unsigned int verdict = nat_hook->manip_pkt(skb, ct,
+ NF_NAT_MANIP_SRC,
+ IP_CT_DIR_ORIGINAL);
+ if (verdict != NF_ACCEPT)
+ return verdict;
+ }
- if (status & IPS_DST_NAT &&
- nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_DST,
- IP_CT_DIR_ORIGINAL) == NF_DROP)
- return -1;
+ if (status & IPS_DST_NAT) {
+ unsigned int verdict = nat_hook->manip_pkt(skb, ct,
+ NF_NAT_MANIP_DST,
+ IP_CT_DIR_ORIGINAL);
+ if (verdict != NF_ACCEPT)
+ return verdict;
+ }
- return 0;
+ return NF_ACCEPT;
}
/* This packet is coming from userspace via nf_queue, complete the packet
@@ -2251,14 +2239,14 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct,
help = nfct_help(ct);
if (!help)
- return 0;
+ return NF_ACCEPT;
helper = rcu_dereference(help->helper);
if (!helper)
- return 0;
+ return NF_ACCEPT;
if (!(helper->flags & NF_CT_HELPER_F_USERSPACE))
- return 0;
+ return NF_ACCEPT;
switch (nf_ct_l3num(ct)) {
case NFPROTO_IPV4:
@@ -2273,42 +2261,44 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct,
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
&frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
- return 0;
+ return NF_ACCEPT;
break;
}
#endif
default:
- return 0;
+ return NF_ACCEPT;
}
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
- return -1;
+ return NF_DROP;
}
}
/* We've seen it coming out the other side: confirm it */
- return nf_conntrack_confirm(skb) == NF_DROP ? - 1 : 0;
+ return nf_conntrack_confirm(skb);
}
static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
{
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
- int err;
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
- return 0;
+ return NF_ACCEPT;
if (!nf_ct_is_confirmed(ct)) {
- err = __nf_conntrack_update(net, skb, ct, ctinfo);
- if (err < 0)
- return err;
+ int ret = __nf_conntrack_update(net, skb, ct, ctinfo);
+
+ if (ret != NF_ACCEPT)
+ return ret;
ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return NF_ACCEPT;
}
return nf_confirm_cthelper(skb, ct, ctinfo);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index f22691f83..4ed5878cb 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -194,12 +194,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
struct nf_conntrack_helper *helper = NULL;
struct nf_conn_help *help;
- /* We already got a helper explicitly attached. The function
- * nf_conntrack_alter_reply - in case NAT is in use - asks for looking
- * the helper up again. Since now the user is in full control of
- * making consistent helper configurations, skip this automatic
- * re-lookup, otherwise we'll lose the helper.
- */
+ /* We already got a helper explicitly attached (e.g. nft_ct) */
if (test_bit(IPS_HELPER_BIT, &ct->status))
return 0;
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index 6e70e137a..6c46aad23 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -11,8 +11,6 @@
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_labels.h>
-static DEFINE_SPINLOCK(nf_connlabels_lock);
-
static int replace_u32(u32 *address, u32 mask, u32 new)
{
u32 old, tmp;
@@ -60,23 +58,24 @@ EXPORT_SYMBOL_GPL(nf_connlabels_replace);
int nf_connlabels_get(struct net *net, unsigned int bits)
{
+ int v;
+
if (BIT_WORD(bits) >= NF_CT_LABELS_MAX_SIZE / sizeof(long))
return -ERANGE;
- spin_lock(&nf_connlabels_lock);
- net->ct.labels_used++;
- spin_unlock(&nf_connlabels_lock);
-
BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX);
+ v = atomic_inc_return_relaxed(&net->ct.labels_used);
+ WARN_ON_ONCE(v <= 0);
+
return 0;
}
EXPORT_SYMBOL_GPL(nf_connlabels_get);
void nf_connlabels_put(struct net *net)
{
- spin_lock(&nf_connlabels_lock);
- net->ct.labels_used--;
- spin_unlock(&nf_connlabels_lock);
+ int v = atomic_dec_return_relaxed(&net->ct.labels_used);
+
+ WARN_ON_ONCE(v < 0);
}
EXPORT_SYMBOL_GPL(nf_connlabels_put);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 334db2219..fb0ae15e9 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -57,6 +57,7 @@
#include "nf_internals.h"
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("List and change connection tracking table");
struct ctnetlink_list_dump_ctx {
struct nf_conn *last;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index c928ff63b..f36727ed9 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -699,3 +699,4 @@ MODULE_ALIAS("ip_conntrack");
MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv4 and IPv6 connection tracking");
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index c6bd53398..4cc97f971 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -283,7 +283,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
pr_debug("Setting vtag %x for secondary conntrack\n",
sh->vtag);
ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
- } else {
+ } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) {
/* If it is a shutdown ack OOTB packet, we expect a return
shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
pr_debug("Setting vtag %x for new conn OOTB\n",
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 4018acb1d..ae493599a 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -457,7 +457,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender,
const struct sk_buff *skb,
unsigned int dataoff,
const struct tcphdr *tcph,
- u32 end, u32 win)
+ u32 end, u32 win,
+ enum ip_conntrack_dir dir)
{
/* SYN-ACK in reply to a SYN
* or SYN from reply direction in simultaneous open.
@@ -471,7 +472,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender,
* Both sides must send the Window Scale option
* to enable window scaling in either direction.
*/
- if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+ if (dir == IP_CT_DIR_REPLY &&
+ !(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) {
sender->td_scale = 0;
receiver->td_scale = 0;
@@ -542,7 +544,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
if (tcph->syn) {
tcp_init_sender(sender, receiver,
skb, dataoff, tcph,
- end, win);
+ end, win, dir);
if (!tcph->ack)
/* Simultaneous open */
return NFCT_TCP_ACCEPT;
@@ -585,7 +587,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
*/
tcp_init_sender(sender, receiver,
skb, dataoff, tcph,
- end, win);
+ end, win, dir);
if (dir == IP_CT_DIR_REPLY && !tcph->ack)
return NFCT_TCP_ACCEPT;
@@ -835,7 +837,8 @@ static bool tcp_error(const struct tcphdr *th,
static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff,
- const struct tcphdr *th)
+ const struct tcphdr *th,
+ const struct nf_hook_state *state)
{
enum tcp_conntrack new_state;
struct net *net = nf_ct_net(ct);
@@ -846,7 +849,7 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
/* Invalid: delete conntrack */
if (new_state >= TCP_CONNTRACK_MAX) {
- pr_debug("nf_ct_tcp: invalid new deleting.\n");
+ tcp_error_log(skb, state, "invalid new");
return false;
}
@@ -980,7 +983,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
if (tcp_error(th, skb, dataoff, state))
return -NF_ACCEPT;
- if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th))
+ if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th, state))
return -NF_ACCEPT;
spin_lock_bh(&ct->lock);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 920a5a29a..a05713392 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -87,12 +87,22 @@ static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
return 0;
}
+static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route,
+ enum flow_offload_tuple_dir dir)
+{
+ struct dst_entry *dst = route->tuple[dir].dst;
+
+ route->tuple[dir].dst = NULL;
+
+ return dst;
+}
+
static int flow_offload_fill_route(struct flow_offload *flow,
- const struct nf_flow_route *route,
+ struct nf_flow_route *route,
enum flow_offload_tuple_dir dir)
{
struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
- struct dst_entry *dst = route->tuple[dir].dst;
+ struct dst_entry *dst = nft_route_dst_fetch(route, dir);
int i, j = 0;
switch (flow_tuple->l3proto) {
@@ -122,6 +132,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
ETH_ALEN);
flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
+ dst_release(dst);
break;
case FLOW_OFFLOAD_XMIT_XFRM:
case FLOW_OFFLOAD_XMIT_NEIGH:
@@ -146,7 +157,7 @@ static void nft_flow_dst_release(struct flow_offload *flow,
}
void flow_offload_route_init(struct flow_offload *flow,
- const struct nf_flow_route *route)
+ struct nf_flow_route *route)
{
flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 8cc52d2bd..e16f15838 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -193,11 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type)
return;
}
- BUG_ON(loggers[pf][type] == NULL);
-
rcu_read_lock();
logger = rcu_dereference(loggers[pf][type]);
- module_put(logger->me);
+ if (!logger)
+ WARN_ON_ONCE(1);
+ else
+ module_put(logger->me);
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(nf_logger_put);
diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c
index 141ee7783..6e3b2f588 100644
--- a/net/netfilter/nf_nat_bpf.c
+++ b/net/netfilter/nf_nat_bpf.c
@@ -12,9 +12,7 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_nat.h>
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
- "Global functions as their definitions will be in nf_nat BTF");
+__bpf_kfunc_start_defs();
/* bpf_ct_set_nat_info - Set source or destination nat address
*
@@ -54,7 +52,7 @@ __bpf_kfunc int bpf_ct_set_nat_info(struct nf_conn___init *nfct,
return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0;
}
-__diag_pop()
+__bpf_kfunc_end_defs();
BTF_SET8_START(nf_nat_kfunc_set)
BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS)
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index c4e0516a8..c3d7ecbc7 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -1263,6 +1263,7 @@ static void __exit nf_nat_cleanup(void)
}
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Network address translation core");
module_init(nf_nat_init);
module_exit(nf_nat_cleanup);
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index 48cc60084..dc450cc81 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -668,7 +668,7 @@ static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int
struct flowi fl;
int err;
- err = xfrm_decode_session(skb, &fl, family);
+ err = xfrm_decode_session(net, skb, &fl, family);
if (err < 0)
return err;
@@ -697,6 +697,31 @@ static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int
}
#endif
+static bool nf_nat_inet_port_was_mangled(const struct sk_buff *skb, __be16 sport)
+{
+ enum ip_conntrack_info ctinfo;
+ enum ip_conntrack_dir dir;
+ const struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return false;
+
+ switch (nf_ct_protonum(ct)) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ break;
+ default:
+ return false;
+ }
+
+ dir = CTINFO2DIR(ctinfo);
+ if (dir != IP_CT_DIR_ORIGINAL)
+ return false;
+
+ return ct->tuplehash[!dir].tuple.dst.u.all != sport;
+}
+
static unsigned int
nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -707,8 +732,20 @@ nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
ret = nf_nat_ipv4_fn(priv, skb, state);
- if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr &&
- !inet_sk_transparent(sk))
+ if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk))
+ return ret;
+
+ /* skb has a socket assigned via tcp edemux. We need to check
+ * if nf_nat_ipv4_fn() has mangled the packet in a way that
+ * edemux would not have found this socket.
+ *
+ * This includes both changes to the source address and changes
+ * to the source port, which are both handled by the
+ * nf_nat_ipv4_fn() call above -- long after tcp/udp early demux
+ * might have found a socket for the old (pre-snat) address.
+ */
+ if (saddr != ip_hdr(skb)->saddr ||
+ nf_nat_inet_port_was_mangled(skb, sk->sk_dport))
skb_orphan(skb); /* TCP edemux obtained wrong socket */
return ret;
@@ -938,14 +975,36 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
}
static unsigned int
+nf_nat_ipv6_local_in(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct in6_addr saddr = ipv6_hdr(skb)->saddr;
+ struct sock *sk = skb->sk;
+ unsigned int ret;
+
+ ret = nf_nat_ipv6_fn(priv, skb, state);
+
+ if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk))
+ return ret;
+
+ /* see nf_nat_ipv4_local_in */
+ if (ipv6_addr_cmp(&saddr, &ipv6_hdr(skb)->saddr) ||
+ nf_nat_inet_port_was_mangled(skb, sk->sk_dport))
+ skb_orphan(skb);
+
+ return ret;
+}
+
+static unsigned int
nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- unsigned int ret;
+ unsigned int ret, verdict;
struct in6_addr daddr = ipv6_hdr(skb)->daddr;
ret = nf_nat_ipv6_fn(priv, skb, state);
- if (ret != NF_DROP && ret != NF_STOLEN &&
+ verdict = ret & NF_VERDICT_MASK;
+ if (verdict != NF_DROP && verdict != NF_STOLEN &&
ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
skb_dst_drop(skb);
@@ -1051,7 +1110,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
},
/* After packet filtering, change source */
{
- .hook = nf_nat_ipv6_fn,
+ .hook = nf_nat_ipv6_local_in,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC,
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 16915f8ee..467671f2d 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -153,7 +153,7 @@ void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info,
struct synproxy_options *opts)
{
opts->tsecr = opts->tsval;
- opts->tsval = tcp_time_stamp_raw() & ~0x3f;
+ opts->tsval = tcp_clock_ms() & ~0x3f;
if (opts->options & NF_SYNPROXY_OPT_WSCALE) {
opts->tsval |= opts->wscale;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4fc8348dd..79e088e6f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -592,9 +592,9 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- nft_setelem_data_deactivate(ctx->net, set, elem);
+ nft_setelem_data_deactivate(ctx->net, set, elem_priv);
return 0;
}
@@ -602,7 +602,7 @@ static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
struct nft_set_elem_catchall {
struct list_head list;
struct rcu_head rcu;
- void *elem;
+ struct nft_elem_priv *elem;
};
static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
@@ -610,7 +610,6 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
{
u8 genmask = nft_genmask_next(ctx->net);
struct nft_set_elem_catchall *catchall;
- struct nft_set_elem elem;
struct nft_set_ext *ext;
list_for_each_entry(catchall, &set->catchall_list, list) {
@@ -618,8 +617,7 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
if (!nft_set_elem_active(ext, genmask))
continue;
- elem.priv = catchall->elem;
- nft_setelem_data_deactivate(ctx->net, set, &elem);
+ nft_setelem_data_deactivate(ctx->net, set, catchall->elem);
break;
}
}
@@ -686,15 +684,16 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
return err;
}
-static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
- struct nft_flowtable *flowtable)
+static struct nft_trans *
+nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_flowtable *flowtable)
{
struct nft_trans *trans;
trans = nft_trans_alloc(ctx, msg_type,
sizeof(struct nft_trans_flowtable));
if (trans == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
if (msg_type == NFT_MSG_NEWFLOWTABLE)
nft_activate_next(ctx->net, flowtable);
@@ -703,22 +702,22 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
nft_trans_flowtable(trans) = flowtable;
nft_trans_commit_list_add_tail(ctx->net, trans);
- return 0;
+ return trans;
}
static int nft_delflowtable(struct nft_ctx *ctx,
struct nft_flowtable *flowtable)
{
- int err;
+ struct nft_trans *trans;
- err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
- if (err < 0)
- return err;
+ trans = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
nft_deactivate_next(ctx->net, flowtable);
nft_use_dec(&ctx->table->use);
- return err;
+ return 0;
}
static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg)
@@ -1253,6 +1252,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
return 0;
err_register_hooks:
+ ctx->table->flags |= NFT_TABLE_F_DORMANT;
nft_trans_destroy(trans);
return ret;
}
@@ -2082,7 +2082,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
struct nft_hook *hook;
int err;
- hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
+ hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
if (!hook) {
err = -ENOMEM;
goto err_hook_alloc;
@@ -2505,19 +2505,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
RCU_INIT_POINTER(chain->blob_gen_0, blob);
RCU_INIT_POINTER(chain->blob_gen_1, blob);
- err = nf_tables_register_hook(net, table, chain);
- if (err < 0)
- goto err_destroy_chain;
-
if (!nft_use_inc(&table->use)) {
err = -EMFILE;
- goto err_use;
+ goto err_destroy_chain;
}
trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
- goto err_unregister_hook;
+ goto err_trans;
}
nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET;
@@ -2525,17 +2521,22 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
nft_trans_chain_policy(trans) = policy;
err = nft_chain_add(table, chain);
- if (err < 0) {
- nft_trans_destroy(trans);
- goto err_unregister_hook;
- }
+ if (err < 0)
+ goto err_chain_add;
+
+ /* This must be LAST to ensure no packets are walking over this chain. */
+ err = nf_tables_register_hook(net, table, chain);
+ if (err < 0)
+ goto err_register_hook;
return 0;
-err_unregister_hook:
+err_register_hook:
+ nft_chain_del(chain);
+err_chain_add:
+ nft_trans_destroy(trans);
+err_trans:
nft_use_dec_restore(&table->use);
-err_use:
- nf_tables_unregister_hook(net, table, chain);
err_destroy_chain:
nf_tables_chain_destroy(ctx);
@@ -3327,7 +3328,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
[NFTA_RULE_CHAIN] = { .type = NLA_STRING,
.len = NFT_CHAIN_MAXNAMELEN - 1 },
[NFTA_RULE_HANDLE] = { .type = NLA_U64 },
- [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED },
+ [NFTA_RULE_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy),
[NFTA_RULE_COMPAT] = { .type = NLA_NESTED },
[NFTA_RULE_POSITION] = { .type = NLA_U64 },
[NFTA_RULE_USERDATA] = { .type = NLA_BINARY,
@@ -3452,20 +3453,21 @@ static void audit_log_rule_reset(const struct nft_table *table,
}
struct nft_rule_dump_ctx {
+ unsigned int s_idx;
char *table;
char *chain;
+ bool reset;
};
static int __nf_tables_dump_rules(struct sk_buff *skb,
unsigned int *idx,
struct netlink_callback *cb,
const struct nft_table *table,
- const struct nft_chain *chain,
- bool reset)
+ const struct nft_chain *chain)
{
+ struct nft_rule_dump_ctx *ctx = (void *)cb->ctx;
struct net *net = sock_net(skb->sk);
const struct nft_rule *rule, *prule;
- unsigned int s_idx = cb->args[0];
unsigned int entries = 0;
int ret = 0;
u64 handle;
@@ -3474,7 +3476,7 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
list_for_each_entry_rcu(rule, &chain->rules, list) {
if (!nft_is_active(net, rule))
goto cont_skip;
- if (*idx < s_idx)
+ if (*idx < ctx->s_idx)
goto cont;
if (prule)
handle = prule->handle;
@@ -3486,7 +3488,7 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
NFT_MSG_NEWRULE,
NLM_F_MULTI | NLM_F_APPEND,
table->family,
- table, chain, rule, handle, reset) < 0) {
+ table, chain, rule, handle, ctx->reset) < 0) {
ret = 1;
break;
}
@@ -3498,7 +3500,7 @@ cont_skip:
(*idx)++;
}
- if (reset && entries)
+ if (ctx->reset && entries)
audit_log_rule_reset(table, cb->seq, entries);
return ret;
@@ -3508,17 +3510,13 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- const struct nft_rule_dump_ctx *ctx = cb->data;
+ struct nft_rule_dump_ctx *ctx = (void *)cb->ctx;
struct nft_table *table;
const struct nft_chain *chain;
unsigned int idx = 0;
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nftables_pernet *nft_net;
- bool reset = false;
-
- if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET)
- reset = true;
rcu_read_lock();
nft_net = nft_pernet(net);
@@ -3528,10 +3526,10 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
if (family != NFPROTO_UNSPEC && family != table->family)
continue;
- if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0)
+ if (ctx->table && strcmp(ctx->table, table->name) != 0)
continue;
- if (ctx && ctx->table && ctx->chain) {
+ if (ctx->table && ctx->chain) {
struct rhlist_head *list, *tmp;
list = rhltable_lookup(&table->chains_ht, ctx->chain,
@@ -3543,7 +3541,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
if (!nft_is_active(net, chain))
continue;
__nf_tables_dump_rules(skb, &idx,
- cb, table, chain, reset);
+ cb, table, chain);
break;
}
goto done;
@@ -3551,68 +3549,81 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
list_for_each_entry_rcu(chain, &table->chains, list) {
if (__nf_tables_dump_rules(skb, &idx,
- cb, table, chain, reset))
+ cb, table, chain))
goto done;
}
- if (ctx && ctx->table)
+ if (ctx->table)
break;
}
done:
rcu_read_unlock();
- cb->args[0] = idx;
+ ctx->s_idx = idx;
return skb->len;
}
+static int nf_tables_dumpreset_rules(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk));
+ int ret;
+
+ /* Mutex is held is to prevent that two concurrent dump-and-reset calls
+ * do not underrun counters and quotas. The commit_mutex is used for
+ * the lack a better lock, this is not transaction path.
+ */
+ mutex_lock(&nft_net->commit_mutex);
+ ret = nf_tables_dump_rules(skb, cb);
+ mutex_unlock(&nft_net->commit_mutex);
+
+ return ret;
+}
+
static int nf_tables_dump_rules_start(struct netlink_callback *cb)
{
+ struct nft_rule_dump_ctx *ctx = (void *)cb->ctx;
const struct nlattr * const *nla = cb->data;
- struct nft_rule_dump_ctx *ctx = NULL;
- if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) {
- ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
- if (!ctx)
- return -ENOMEM;
+ BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
- if (nla[NFTA_RULE_TABLE]) {
- ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
- GFP_ATOMIC);
- if (!ctx->table) {
- kfree(ctx);
- return -ENOMEM;
- }
- }
- if (nla[NFTA_RULE_CHAIN]) {
- ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
- GFP_ATOMIC);
- if (!ctx->chain) {
- kfree(ctx->table);
- kfree(ctx);
- return -ENOMEM;
- }
+ if (nla[NFTA_RULE_TABLE]) {
+ ctx->table = nla_strdup(nla[NFTA_RULE_TABLE], GFP_ATOMIC);
+ if (!ctx->table)
+ return -ENOMEM;
+ }
+ if (nla[NFTA_RULE_CHAIN]) {
+ ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN], GFP_ATOMIC);
+ if (!ctx->chain) {
+ kfree(ctx->table);
+ return -ENOMEM;
}
}
-
- cb->data = ctx;
return 0;
}
+static int nf_tables_dumpreset_rules_start(struct netlink_callback *cb)
+{
+ struct nft_rule_dump_ctx *ctx = (void *)cb->ctx;
+
+ ctx->reset = true;
+
+ return nf_tables_dump_rules_start(cb);
+}
+
static int nf_tables_dump_rules_done(struct netlink_callback *cb)
{
- struct nft_rule_dump_ctx *ctx = cb->data;
+ struct nft_rule_dump_ctx *ctx = (void *)cb->ctx;
- if (ctx) {
- kfree(ctx->table);
- kfree(ctx->chain);
- kfree(ctx);
- }
+ kfree(ctx->table);
+ kfree(ctx->chain);
return 0;
}
/* called with rcu_read_lock held */
-static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
- const struct nlattr * const nla[])
+static struct sk_buff *
+nf_tables_getrule_single(u32 portid, const struct nfnl_info *info,
+ const struct nlattr * const nla[], bool reset)
{
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
@@ -3622,60 +3633,110 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
struct net *net = info->net;
struct nft_table *table;
struct sk_buff *skb2;
- bool reset = false;
int err;
- if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
- struct netlink_dump_control c = {
- .start= nf_tables_dump_rules_start,
- .dump = nf_tables_dump_rules,
- .done = nf_tables_dump_rules_done,
- .module = THIS_MODULE,
- .data = (void *)nla,
- };
-
- return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
- }
-
table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, 0);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
- return PTR_ERR(table);
+ return ERR_CAST(table);
}
chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
- return PTR_ERR(chain);
+ return ERR_CAST(chain);
}
rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
if (IS_ERR(rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
- return PTR_ERR(rule);
+ return ERR_CAST(rule);
}
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb2)
- return -ENOMEM;
-
- if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET)
- reset = true;
+ return ERR_PTR(-ENOMEM);
- err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
+ err = nf_tables_fill_rule_info(skb2, net, portid,
info->nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
family, table, chain, rule, 0, reset);
- if (err < 0)
- goto err_fill_rule_info;
+ if (err < 0) {
+ kfree_skb(skb2);
+ return ERR_PTR(err);
+ }
- if (reset)
- audit_log_rule_reset(table, nft_pernet(net)->base_seq, 1);
+ return skb2;
+}
- return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
+{
+ u32 portid = NETLINK_CB(skb).portid;
+ struct net *net = info->net;
+ struct sk_buff *skb2;
-err_fill_rule_info:
- kfree_skb(skb2);
- return err;
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .start= nf_tables_dump_rules_start,
+ .dump = nf_tables_dump_rules,
+ .done = nf_tables_dump_rules_done,
+ .module = THIS_MODULE,
+ .data = (void *)nla,
+ };
+
+ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+ }
+
+ skb2 = nf_tables_getrule_single(portid, info, nla, false);
+ if (IS_ERR(skb2))
+ return PTR_ERR(skb2);
+
+ return nfnetlink_unicast(skb2, net, portid);
+}
+
+static int nf_tables_getrule_reset(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nla[])
+{
+ struct nftables_pernet *nft_net = nft_pernet(info->net);
+ u32 portid = NETLINK_CB(skb).portid;
+ struct net *net = info->net;
+ struct sk_buff *skb2;
+ char *buf;
+
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .start= nf_tables_dumpreset_rules_start,
+ .dump = nf_tables_dumpreset_rules,
+ .done = nf_tables_dump_rules_done,
+ .module = THIS_MODULE,
+ .data = (void *)nla,
+ };
+
+ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+ }
+
+ if (!try_module_get(THIS_MODULE))
+ return -EINVAL;
+ rcu_read_unlock();
+ mutex_lock(&nft_net->commit_mutex);
+ skb2 = nf_tables_getrule_single(portid, info, nla, true);
+ mutex_unlock(&nft_net->commit_mutex);
+ rcu_read_lock();
+ module_put(THIS_MODULE);
+
+ if (IS_ERR(skb2))
+ return PTR_ERR(skb2);
+
+ buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
+ nla_len(nla[NFTA_RULE_TABLE]),
+ (char *)nla_data(nla[NFTA_RULE_TABLE]),
+ nft_net->base_seq);
+ audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1,
+ AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC);
+ kfree(buf);
+
+ return nfnetlink_unicast(skb2, net, portid);
}
void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule)
@@ -3758,9 +3819,9 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
struct nft_ctx *pctx = (struct nft_ctx *)ctx;
const struct nft_data *data;
int err;
@@ -3790,7 +3851,6 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
{
u8 genmask = nft_genmask_next(ctx->net);
struct nft_set_elem_catchall *catchall;
- struct nft_set_elem elem;
struct nft_set_ext *ext;
int ret = 0;
@@ -3799,8 +3859,7 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
if (!nft_set_elem_active(ext, genmask))
continue;
- elem.priv = catchall->elem;
- ret = nft_setelem_validate(ctx, set, NULL, &elem);
+ ret = nft_setelem_validate(ctx, set, NULL, catchall->elem);
if (ret < 0)
return ret;
}
@@ -4261,12 +4320,16 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 },
[NFTA_SET_HANDLE] = { .type = NLA_U64 },
[NFTA_SET_EXPR] = { .type = NLA_NESTED },
- [NFTA_SET_EXPRESSIONS] = { .type = NLA_NESTED },
+ [NFTA_SET_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy),
+};
+
+static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = {
+ [NFTA_SET_FIELD_LEN] = { .type = NLA_U32 },
};
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
[NFTA_SET_DESC_SIZE] = { .type = NLA_U32 },
- [NFTA_SET_DESC_CONCAT] = { .type = NLA_NESTED },
+ [NFTA_SET_DESC_CONCAT] = NLA_POLICY_NESTED_ARRAY(nft_concat_policy),
};
static struct nft_set *nft_set_lookup(const struct nft_table *table,
@@ -4705,8 +4768,10 @@ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
return -EINVAL;
set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
- if (IS_ERR(set))
+ if (IS_ERR(set)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
return PTR_ERR(set);
+ }
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (skb2 == NULL)
@@ -4723,10 +4788,6 @@ err_fill_set_info:
return err;
}
-static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = {
- [NFTA_SET_FIELD_LEN] = { .type = NLA_U32 },
-};
-
static int nft_set_desc_concat_parse(const struct nlattr *attr,
struct nft_set_desc *desc)
{
@@ -5269,9 +5330,9 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
static int nft_setelem_data_validate(const struct nft_ctx *ctx,
struct nft_set *set,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
enum nft_registers dreg;
dreg = nft_type_to_reg(set->dtype);
@@ -5284,9 +5345,9 @@ static int nft_setelem_data_validate(const struct nft_ctx *ctx,
static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- return nft_setelem_data_validate(ctx, set, elem);
+ return nft_setelem_data_validate(ctx, set, elem_priv);
}
static int nft_set_catchall_bind_check(const struct nft_ctx *ctx,
@@ -5294,7 +5355,6 @@ static int nft_set_catchall_bind_check(const struct nft_ctx *ctx,
{
u8 genmask = nft_genmask_next(ctx->net);
struct nft_set_elem_catchall *catchall;
- struct nft_set_elem elem;
struct nft_set_ext *ext;
int ret = 0;
@@ -5303,8 +5363,7 @@ static int nft_set_catchall_bind_check(const struct nft_ctx *ctx,
if (!nft_set_elem_active(ext, genmask))
continue;
- elem.priv = catchall->elem;
- ret = nft_setelem_data_validate(ctx, set, &elem);
+ ret = nft_setelem_data_validate(ctx, set, catchall->elem);
if (ret < 0)
break;
}
@@ -5371,14 +5430,14 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
static void nft_setelem_data_activate(const struct net *net,
const struct nft_set *set,
- struct nft_set_elem *elem);
+ struct nft_elem_priv *elem_priv);
static int nft_mapelem_activate(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- nft_setelem_data_activate(ctx->net, set, elem);
+ nft_setelem_data_activate(ctx->net, set, elem_priv);
return 0;
}
@@ -5388,7 +5447,6 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx,
{
u8 genmask = nft_genmask_next(ctx->net);
struct nft_set_elem_catchall *catchall;
- struct nft_set_elem elem;
struct nft_set_ext *ext;
list_for_each_entry(catchall, &set->catchall_list, list) {
@@ -5396,8 +5454,7 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx,
if (!nft_set_elem_active(ext, genmask))
continue;
- elem.priv = catchall->elem;
- nft_setelem_data_activate(ctx->net, set, &elem);
+ nft_setelem_data_activate(ctx->net, set, catchall->elem);
break;
}
}
@@ -5524,7 +5581,7 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
[NFTA_SET_ELEM_OBJREF] = { .type = NLA_STRING,
.len = NFT_OBJ_MAXNAMELEN - 1 },
[NFTA_SET_ELEM_KEY_END] = { .type = NLA_NESTED },
- [NFTA_SET_ELEM_EXPRESSIONS] = { .type = NLA_NESTED },
+ [NFTA_SET_ELEM_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy),
};
static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
@@ -5532,7 +5589,7 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
.len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING,
.len = NFT_SET_MAXNAMELEN - 1 },
- [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED },
+ [NFTA_SET_ELEM_LIST_ELEMENTS] = NLA_POLICY_NESTED_ARRAY(nft_set_elem_policy),
[NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 },
};
@@ -5576,10 +5633,10 @@ nla_put_failure:
static int nf_tables_fill_setelem(struct sk_buff *skb,
const struct nft_set *set,
- const struct nft_set_elem *elem,
+ const struct nft_elem_priv *elem_priv,
bool reset)
{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
@@ -5665,16 +5722,16 @@ struct nft_set_dump_args {
static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
struct nft_set_dump_args *args;
if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext))
return 0;
args = container_of(iter, struct nft_set_dump_args, iter);
- return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
+ return nf_tables_fill_setelem(args->skb, set, elem_priv, args->reset);
}
static void audit_log_nft_set_reset(const struct nft_table *table,
@@ -5691,6 +5748,7 @@ static void audit_log_nft_set_reset(const struct nft_table *table,
struct nft_set_dump_ctx {
const struct nft_set *set;
struct nft_ctx ctx;
+ bool reset;
};
static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
@@ -5699,7 +5757,6 @@ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
{
struct nft_set_elem_catchall *catchall;
u8 genmask = nft_genmask_cur(net);
- struct nft_set_elem elem;
struct nft_set_ext *ext;
int ret = 0;
@@ -5709,8 +5766,7 @@ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
nft_set_elem_expired(ext))
continue;
- elem.priv = catchall->elem;
- ret = nf_tables_fill_setelem(skb, set, &elem, reset);
+ ret = nf_tables_fill_setelem(skb, set, catchall->elem, reset);
if (reset && !ret)
audit_log_nft_set_reset(set->table, base_seq, 1);
break;
@@ -5730,7 +5786,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
bool set_found = false;
struct nlmsghdr *nlh;
struct nlattr *nest;
- bool reset = false;
u32 portid, seq;
int event;
@@ -5778,12 +5833,9 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
if (nest == NULL)
goto nla_put_failure;
- if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET)
- reset = true;
-
args.cb = cb;
args.skb = skb;
- args.reset = reset;
+ args.reset = dump_ctx->reset;
args.iter.genmask = nft_genmask_cur(net);
args.iter.skip = cb->args[0];
args.iter.count = 0;
@@ -5793,11 +5845,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
if (!args.iter.err && args.iter.count == cb->args[0])
args.iter.err = nft_set_catchall_dump(net, skb, set,
- reset, cb->seq);
+ dump_ctx->reset, cb->seq);
nla_nest_end(skb, nest);
nlmsg_end(skb, nlh);
- if (reset && args.iter.count > args.iter.skip)
+ if (dump_ctx->reset && args.iter.count > args.iter.skip)
audit_log_nft_set_reset(table, cb->seq,
args.iter.count - args.iter.skip);
@@ -5835,7 +5887,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
const struct nft_ctx *ctx, u32 seq,
u32 portid, int event, u16 flags,
const struct nft_set *set,
- const struct nft_set_elem *elem,
+ const struct nft_elem_priv *elem_priv,
bool reset)
{
struct nlmsghdr *nlh;
@@ -5857,7 +5909,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
if (nest == NULL)
goto nla_put_failure;
- err = nf_tables_fill_setelem(skb, set, elem, reset);
+ err = nf_tables_fill_setelem(skb, set, elem_priv, reset);
if (err < 0)
goto nla_put_failure;
@@ -6007,7 +6059,7 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return err;
err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid,
- NFT_MSG_NEWSETELEM, 0, set, &elem,
+ NFT_MSG_NEWSETELEM, 0, set, elem.priv,
reset);
if (err < 0)
goto err_fill_setelem;
@@ -6043,11 +6095,16 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
}
set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
- if (IS_ERR(set))
+ if (IS_ERR(set)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
+ }
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+ if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET)
+ reset = true;
+
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.start = nf_tables_dump_set_start,
@@ -6058,6 +6115,7 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
struct nft_set_dump_ctx dump_ctx = {
.set = set,
.ctx = ctx,
+ .reset = reset,
};
c.data = &dump_ctx;
@@ -6067,9 +6125,6 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
return -EINVAL;
- if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET)
- reset = true;
-
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_get_set_elem(&ctx, set, attr, reset);
if (err < 0) {
@@ -6088,7 +6143,7 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
const struct nft_set *set,
- const struct nft_set_elem *elem,
+ const struct nft_elem_priv *elem_priv,
int event)
{
struct nftables_pernet *nft_net;
@@ -6109,7 +6164,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags,
- set, elem, false);
+ set, elem_priv, false);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -6184,10 +6239,11 @@ static int nft_set_ext_memcpy(const struct nft_set_ext_tmpl *tmpl, u8 id,
return 0;
}
-void *nft_set_elem_init(const struct nft_set *set,
- const struct nft_set_ext_tmpl *tmpl,
- const u32 *key, const u32 *key_end,
- const u32 *data, u64 timeout, u64 expiration, gfp_t gfp)
+struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set,
+ const struct nft_set_ext_tmpl *tmpl,
+ const u32 *key, const u32 *key_end,
+ const u32 *data,
+ u64 timeout, u64 expiration, gfp_t gfp)
{
struct nft_set_ext *ext;
void *elem;
@@ -6252,10 +6308,11 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx,
}
/* Drop references and destroy. Called from gc, dynset and abort path. */
-void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+void nft_set_elem_destroy(const struct nft_set *set,
+ const struct nft_elem_priv *elem_priv,
bool destroy_expr)
{
- struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
struct nft_ctx ctx = {
.net = read_pnet(&set->net),
.family = set->table->family,
@@ -6266,10 +6323,10 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
nft_data_release(nft_set_ext_data(ext), set->dtype);
if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS))
nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext));
-
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
nft_use_dec(&(*nft_set_ext_obj(ext))->use);
- kfree(elem);
+
+ kfree(elem_priv);
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
@@ -6277,14 +6334,15 @@ EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
* path via nft_setelem_data_deactivate().
*/
void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
- const struct nft_set *set, void *elem)
+ const struct nft_set *set,
+ const struct nft_elem_priv *elem_priv)
{
- struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS))
nft_set_elem_expr_destroy(ctx, nft_set_ext_expr(ext));
- kfree(elem);
+ kfree(elem_priv);
}
int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
@@ -6379,7 +6437,7 @@ EXPORT_SYMBOL_GPL(nft_set_catchall_lookup);
static int nft_setelem_catchall_insert(const struct net *net,
struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **pext)
+ struct nft_elem_priv **priv)
{
struct nft_set_elem_catchall *catchall;
u8 genmask = nft_genmask_next(net);
@@ -6388,7 +6446,7 @@ static int nft_setelem_catchall_insert(const struct net *net,
list_for_each_entry(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
if (nft_set_elem_active(ext, genmask)) {
- *pext = ext;
+ *priv = catchall->elem;
return -EEXIST;
}
}
@@ -6406,22 +6464,23 @@ static int nft_setelem_catchall_insert(const struct net *net,
static int nft_setelem_insert(const struct net *net,
struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **ext, unsigned int flags)
+ struct nft_elem_priv **elem_priv,
+ unsigned int flags)
{
int ret;
if (flags & NFT_SET_ELEM_CATCHALL)
- ret = nft_setelem_catchall_insert(net, set, elem, ext);
+ ret = nft_setelem_catchall_insert(net, set, elem, elem_priv);
else
- ret = set->ops->insert(net, set, elem, ext);
+ ret = set->ops->insert(net, set, elem, elem_priv);
return ret;
}
static bool nft_setelem_is_catchall(const struct nft_set *set,
- const struct nft_set_elem *elem)
+ const struct nft_elem_priv *elem_priv)
{
- struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
*nft_set_ext_flags(ext) & NFT_SET_ELEM_CATCHALL)
@@ -6431,14 +6490,14 @@ static bool nft_setelem_is_catchall(const struct nft_set *set,
}
static void nft_setelem_activate(struct net *net, struct nft_set *set,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
- if (nft_setelem_is_catchall(set, elem)) {
+ if (nft_setelem_is_catchall(set, elem_priv)) {
nft_set_elem_change_active(net, set, ext);
} else {
- set->ops->activate(net, set, elem);
+ set->ops->activate(net, set, elem_priv);
}
}
@@ -6502,12 +6561,12 @@ static void nft_setelem_catchall_destroy(struct nft_set_elem_catchall *catchall)
static void nft_setelem_catchall_remove(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
struct nft_set_elem_catchall *catchall, *next;
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
- if (catchall->elem == elem->priv) {
+ if (catchall->elem == elem_priv) {
nft_setelem_catchall_destroy(catchall);
break;
}
@@ -6516,12 +6575,12 @@ static void nft_setelem_catchall_remove(const struct net *net,
static void nft_setelem_remove(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- if (nft_setelem_is_catchall(set, elem))
- nft_setelem_catchall_remove(net, set, elem);
+ if (nft_setelem_is_catchall(set, elem_priv))
+ nft_setelem_catchall_remove(net, set, elem_priv);
else
- set->ops->remove(net, set, elem);
+ set->ops->remove(net, set, elem_priv);
}
static bool nft_setelem_valid_key_end(const struct nft_set *set,
@@ -6554,13 +6613,14 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_ext *ext, *ext2;
struct nft_set_elem elem;
struct nft_set_binding *binding;
+ struct nft_elem_priv *elem_priv;
struct nft_object *obj = NULL;
struct nft_userdata *udata;
struct nft_data_desc desc;
enum nft_registers dreg;
struct nft_trans *trans;
- u64 timeout;
u64 expiration;
+ u64 timeout;
int err, i;
u8 ulen;
@@ -6853,9 +6913,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
ext->genmask = nft_genmask_cur(ctx->net);
- err = nft_setelem_insert(ctx->net, set, &elem, &ext2, flags);
+ err = nft_setelem_insert(ctx->net, set, &elem, &elem_priv, flags);
if (err) {
if (err == -EEXIST) {
+ ext2 = nft_set_elem_ext(set, elem_priv);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^
nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) ||
nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^
@@ -6889,12 +6950,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
}
}
- nft_trans_elem(trans) = elem;
+ nft_trans_elem_priv(trans) = elem.priv;
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
err_set_full:
- nft_setelem_remove(ctx->net, set, &elem);
+ nft_setelem_remove(ctx->net, set, elem.priv);
err_element_clash:
kfree(trans);
err_elem_free:
@@ -6942,8 +7003,10 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
set = nft_set_lookup_global(net, table, nla[NFTA_SET_ELEM_LIST_SET],
nla[NFTA_SET_ELEM_LIST_SET_ID], genmask);
- if (IS_ERR(set))
+ if (IS_ERR(set)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
+ }
if (!list_empty(&set->bindings) &&
(set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
@@ -6993,9 +7056,9 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
static void nft_setelem_data_activate(const struct net *net,
const struct nft_set *set,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_hold(nft_set_ext_data(ext), set->dtype);
@@ -7005,9 +7068,9 @@ static void nft_setelem_data_activate(const struct net *net,
void nft_setelem_data_deactivate(const struct net *net,
const struct nft_set *set,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_release(nft_set_ext_data(ext), set->dtype);
@@ -7092,9 +7155,9 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto fail_ops;
- nft_setelem_data_deactivate(ctx->net, set, &elem);
+ nft_setelem_data_deactivate(ctx->net, set, elem.priv);
- nft_trans_elem(trans) = elem;
+ nft_trans_elem_priv(trans) = elem.priv;
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
@@ -7112,36 +7175,29 @@ fail_elem:
static int nft_setelem_flush(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
struct nft_trans *trans;
- int err;
trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
sizeof(struct nft_trans_elem), GFP_ATOMIC);
if (!trans)
return -ENOMEM;
- if (!set->ops->flush(ctx->net, set, elem->priv)) {
- err = -ENOENT;
- goto err1;
- }
+ set->ops->flush(ctx->net, set, elem_priv);
set->ndeact++;
- nft_setelem_data_deactivate(ctx->net, set, elem);
+ nft_setelem_data_deactivate(ctx->net, set, elem_priv);
nft_trans_elem_set(trans) = set;
- nft_trans_elem(trans) = *elem;
+ nft_trans_elem_priv(trans) = elem_priv;
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
-err1:
- kfree(trans);
- return err;
}
static int __nft_set_catchall_flush(const struct nft_ctx *ctx,
struct nft_set *set,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
struct nft_trans *trans;
@@ -7150,9 +7206,9 @@ static int __nft_set_catchall_flush(const struct nft_ctx *ctx,
if (!trans)
return -ENOMEM;
- nft_setelem_data_deactivate(ctx->net, set, elem);
+ nft_setelem_data_deactivate(ctx->net, set, elem_priv);
nft_trans_elem_set(trans) = set;
- nft_trans_elem(trans) = *elem;
+ nft_trans_elem_priv(trans) = elem_priv;
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
@@ -7163,7 +7219,6 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
{
u8 genmask = nft_genmask_next(ctx->net);
struct nft_set_elem_catchall *catchall;
- struct nft_set_elem elem;
struct nft_set_ext *ext;
int ret = 0;
@@ -7172,8 +7227,7 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
if (!nft_set_elem_active(ext, genmask))
continue;
- elem.priv = catchall->elem;
- ret = __nft_set_catchall_flush(ctx, set, &elem);
+ ret = __nft_set_catchall_flush(ctx, set, catchall->elem);
if (ret < 0)
break;
nft_set_elem_change_active(ctx->net, set, ext);
@@ -7218,8 +7272,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
}
set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
- if (IS_ERR(set))
+ if (IS_ERR(set)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
+ }
if (nft_set_is_anonymous(set))
return -EOPNOTSUPP;
@@ -7415,11 +7471,15 @@ nla_put_failure:
return -1;
}
-static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
+static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family)
{
const struct nft_object_type *type;
list_for_each_entry(type, &nf_tables_objects, list) {
+ if (type->family != NFPROTO_UNSPEC &&
+ type->family != family)
+ continue;
+
if (objtype == type->type)
return type;
}
@@ -7427,11 +7487,11 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
}
static const struct nft_object_type *
-nft_obj_type_get(struct net *net, u32 objtype)
+nft_obj_type_get(struct net *net, u32 objtype, u8 family)
{
const struct nft_object_type *type;
- type = __nft_obj_type_get(objtype);
+ type = __nft_obj_type_get(objtype, family);
if (type != NULL && try_module_get(type->owner))
return type;
@@ -7524,7 +7584,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- type = __nft_obj_type_get(objtype);
+ type = __nft_obj_type_get(objtype, family);
if (WARN_ON_ONCE(!type))
return -ENOENT;
@@ -7538,7 +7598,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (!nft_use_inc(&table->use))
return -EMFILE;
- type = nft_obj_type_get(net, objtype);
+ type = nft_obj_type_get(net, objtype, family);
if (IS_ERR(type)) {
err = PTR_ERR(type);
goto err_type;
@@ -7649,28 +7709,26 @@ static void audit_log_obj_reset(const struct nft_table *table,
kfree(buf);
}
-struct nft_obj_filter {
+struct nft_obj_dump_ctx {
+ unsigned int s_idx;
char *table;
u32 type;
+ bool reset;
};
static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- const struct nft_table *table;
- unsigned int idx = 0, s_idx = cb->args[0];
- struct nft_obj_filter *filter = cb->data;
+ struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nftables_pernet *nft_net;
+ const struct nft_table *table;
unsigned int entries = 0;
struct nft_object *obj;
- bool reset = false;
+ unsigned int idx = 0;
int rc = 0;
- if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
- reset = true;
-
rcu_read_lock();
nft_net = nft_pernet(net);
cb->seq = READ_ONCE(nft_net->base_seq);
@@ -7683,17 +7741,12 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
list_for_each_entry_rcu(obj, &table->objects, list) {
if (!nft_is_active(net, obj))
goto cont;
- if (idx < s_idx)
+ if (idx < ctx->s_idx)
goto cont;
- if (idx > s_idx)
- memset(&cb->args[1], 0,
- sizeof(cb->args) - sizeof(cb->args[0]));
- if (filter && filter->table &&
- strcmp(filter->table, table->name))
+ if (ctx->table && strcmp(ctx->table, table->name))
goto cont;
- if (filter &&
- filter->type != NFT_OBJECT_UNSPEC &&
- obj->ops->type->type != filter->type)
+ if (ctx->type != NFT_OBJECT_UNSPEC &&
+ obj->ops->type->type != ctx->type)
goto cont;
rc = nf_tables_fill_obj_info(skb, net,
@@ -7702,7 +7755,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
NFT_MSG_NEWOBJ,
NLM_F_MULTI | NLM_F_APPEND,
table->family, table,
- obj, reset);
+ obj, ctx->reset);
if (rc < 0)
break;
@@ -7711,51 +7764,44 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
cont:
idx++;
}
- if (reset && entries)
+ if (ctx->reset && entries)
audit_log_obj_reset(table, nft_net->base_seq, entries);
if (rc < 0)
break;
}
rcu_read_unlock();
- cb->args[0] = idx;
+ ctx->s_idx = idx;
return skb->len;
}
static int nf_tables_dump_obj_start(struct netlink_callback *cb)
{
+ struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
const struct nlattr * const *nla = cb->data;
- struct nft_obj_filter *filter = NULL;
- if (nla[NFTA_OBJ_TABLE] || nla[NFTA_OBJ_TYPE]) {
- filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
- if (!filter)
+ BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
+
+ if (nla[NFTA_OBJ_TABLE]) {
+ ctx->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
+ if (!ctx->table)
return -ENOMEM;
+ }
- if (nla[NFTA_OBJ_TABLE]) {
- filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
- if (!filter->table) {
- kfree(filter);
- return -ENOMEM;
- }
- }
+ if (nla[NFTA_OBJ_TYPE])
+ ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
- if (nla[NFTA_OBJ_TYPE])
- filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
- }
+ if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+ ctx->reset = true;
- cb->data = filter;
return 0;
}
static int nf_tables_dump_obj_done(struct netlink_callback *cb)
{
- struct nft_obj_filter *filter = cb->data;
+ struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
- if (filter) {
- kfree(filter->table);
- kfree(filter);
- }
+ kfree(ctx->table);
return 0;
}
@@ -8329,9 +8375,9 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
u8 family = info->nfmsg->nfgen_family;
const struct nf_flowtable_type *type;
struct nft_flowtable *flowtable;
- struct nft_hook *hook, *next;
struct net *net = info->net;
struct nft_table *table;
+ struct nft_trans *trans;
struct nft_ctx ctx;
int err;
@@ -8411,34 +8457,34 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
err = nft_flowtable_parse_hook(&ctx, nla, &flowtable_hook, flowtable,
extack, true);
if (err < 0)
- goto err4;
+ goto err_flowtable_parse_hooks;
list_splice(&flowtable_hook.list, &flowtable->hook_list);
flowtable->data.priority = flowtable_hook.priority;
flowtable->hooknum = flowtable_hook.num;
+ trans = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ goto err_flowtable_trans;
+ }
+
+ /* This must be LAST to ensure no packets are walking over this flowtable. */
err = nft_register_flowtable_net_hooks(ctx.net, table,
&flowtable->hook_list,
flowtable);
- if (err < 0) {
- nft_hooks_destroy(&flowtable->hook_list);
- goto err4;
- }
-
- err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
if (err < 0)
- goto err5;
+ goto err_flowtable_hooks;
list_add_tail_rcu(&flowtable->list, &table->flowtables);
return 0;
-err5:
- list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
- nft_unregister_flowtable_hook(net, flowtable, hook);
- list_del_rcu(&hook->list);
- kfree_rcu(hook, rcu);
- }
-err4:
+
+err_flowtable_hooks:
+ nft_trans_destroy(trans);
+err_flowtable_trans:
+ nft_hooks_destroy(&flowtable->hook_list);
+err_flowtable_parse_hooks:
flowtable->data.type->free(&flowtable->data);
err3:
module_put(type->owner);
@@ -8722,6 +8768,7 @@ static int nf_tables_getflowtable(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const nla[])
{
+ struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
u8 family = info->nfmsg->nfgen_family;
struct nft_flowtable *flowtable;
@@ -8747,13 +8794,17 @@ static int nf_tables_getflowtable(struct sk_buff *skb,
table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
genmask, 0);
- if (IS_ERR(table))
+ if (IS_ERR(table)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]);
return PTR_ERR(table);
+ }
flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
genmask);
- if (IS_ERR(flowtable))
+ if (IS_ERR(flowtable)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
return PTR_ERR(flowtable);
+ }
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb2)
@@ -9009,7 +9060,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_rule_policy,
},
[NFT_MSG_GETRULE_RESET] = {
- .call = nf_tables_getrule,
+ .call = nf_tables_getrule_reset,
.type = NFNL_CB_RCU,
.attr_count = NFTA_RULE_MAX,
.policy = nft_rule_policy,
@@ -9259,7 +9310,7 @@ static void nft_commit_release(struct nft_trans *trans)
case NFT_MSG_DESTROYSETELEM:
nf_tables_set_elem_destroy(&trans->ctx,
nft_trans_elem_set(trans),
- nft_trans_elem(trans).priv);
+ nft_trans_elem_priv(trans));
break;
case NFT_MSG_DELOBJ:
case NFT_MSG_DESTROYOBJ:
@@ -9488,16 +9539,12 @@ void nft_chain_del(struct nft_chain *chain)
static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx,
struct nft_trans_gc *trans)
{
- void **priv = trans->priv;
+ struct nft_elem_priv **priv = trans->priv;
unsigned int i;
for (i = 0; i < trans->count; i++) {
- struct nft_set_elem elem = {
- .priv = priv[i],
- };
-
- nft_setelem_data_deactivate(ctx->net, trans->set, &elem);
- nft_setelem_remove(ctx->net, trans->set, &elem);
+ nft_setelem_data_deactivate(ctx->net, trans->set, priv[i]);
+ nft_setelem_remove(ctx->net, trans->set, priv[i]);
}
}
@@ -9510,7 +9557,7 @@ void nft_trans_gc_destroy(struct nft_trans_gc *trans)
static void nft_trans_gc_trans_free(struct rcu_head *rcu)
{
- struct nft_set_elem elem = {};
+ struct nft_elem_priv *elem_priv;
struct nft_trans_gc *trans;
struct nft_ctx ctx = {};
unsigned int i;
@@ -9519,11 +9566,11 @@ static void nft_trans_gc_trans_free(struct rcu_head *rcu)
ctx.net = read_pnet(&trans->set->net);
for (i = 0; i < trans->count; i++) {
- elem.priv = trans->priv[i];
- if (!nft_setelem_is_catchall(trans->set, &elem))
+ elem_priv = trans->priv[i];
+ if (!nft_setelem_is_catchall(trans->set, elem_priv))
atomic_dec(&trans->set->nelems);
- nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv);
+ nf_tables_set_elem_destroy(&ctx, trans->set, elem_priv);
}
nft_trans_gc_destroy(trans);
@@ -9700,8 +9747,9 @@ dead_elem:
struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
{
struct nft_set_elem_catchall *catchall, *next;
+ u64 tstamp = nft_net_tstamp(gc->net);
const struct nft_set *set = gc->set;
- struct nft_set_elem elem;
+ struct nft_elem_priv *elem_priv;
struct nft_set_ext *ext;
WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net));
@@ -9709,19 +9757,17 @@ struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_expired(ext))
+ if (!__nft_set_elem_expired(ext, tstamp))
continue;
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
if (!gc)
return NULL;
- memset(&elem, 0, sizeof(elem));
- elem.priv = catchall->elem;
-
- nft_setelem_data_deactivate(gc->net, gc->set, &elem);
+ elem_priv = catchall->elem;
+ nft_setelem_data_deactivate(gc->net, gc->set, elem_priv);
nft_setelem_catchall_destroy(catchall);
- nft_trans_gc_elem_add(gc, elem.priv);
+ nft_trans_gc_elem_add(gc, elem_priv);
}
return gc;
@@ -10105,9 +10151,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWSETELEM:
te = (struct nft_trans_elem *)trans->data;
- nft_setelem_activate(net, te->set, &te->elem);
+ nft_setelem_activate(net, te->set, te->elem_priv);
nf_tables_setelem_notify(&trans->ctx, te->set,
- &te->elem,
+ te->elem_priv,
NFT_MSG_NEWSETELEM);
if (te->set->ops->commit &&
list_empty(&te->set->pending_update)) {
@@ -10121,10 +10167,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
te = (struct nft_trans_elem *)trans->data;
nf_tables_setelem_notify(&trans->ctx, te->set,
- &te->elem,
+ te->elem_priv,
trans->msg_type);
- nft_setelem_remove(net, te->set, &te->elem);
- if (!nft_setelem_is_catchall(te->set, &te->elem)) {
+ nft_setelem_remove(net, te->set, te->elem_priv);
+ if (!nft_setelem_is_catchall(te->set, te->elem_priv)) {
atomic_dec(&te->set->nelems);
te->set->ndeact--;
}
@@ -10244,7 +10290,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
break;
case NFT_MSG_NEWSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
- nft_trans_elem(trans).priv, true);
+ nft_trans_elem_priv(trans), true);
break;
case NFT_MSG_NEWOBJ:
nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
@@ -10392,8 +10438,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
}
te = (struct nft_trans_elem *)trans->data;
- nft_setelem_remove(net, te->set, &te->elem);
- if (!nft_setelem_is_catchall(te->set, &te->elem))
+ nft_setelem_remove(net, te->set, te->elem_priv);
+ if (!nft_setelem_is_catchall(te->set, te->elem_priv))
atomic_dec(&te->set->nelems);
if (te->set->ops->abort &&
@@ -10406,9 +10452,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
case NFT_MSG_DESTROYSETELEM:
te = (struct nft_trans_elem *)trans->data;
- nft_setelem_data_activate(net, te->set, &te->elem);
- nft_setelem_activate(net, te->set, &te->elem);
- if (!nft_setelem_is_catchall(te->set, &te->elem))
+ nft_setelem_data_activate(net, te->set, te->elem_priv);
+ nft_setelem_activate(net, te->set, te->elem_priv);
+ if (!nft_setelem_is_catchall(te->set, te->elem_priv))
te->set->ndeact--;
if (te->set->ops->abort &&
@@ -10497,6 +10543,7 @@ static bool nf_tables_valid_genid(struct net *net, u32 genid)
bool genid_ok;
mutex_lock(&nft_net->commit_mutex);
+ nft_net->tstamp = get_jiffies_64();
genid_ok = genid == 0 || nft_net->base_seq == genid;
if (!genid_ok)
@@ -10584,9 +10631,9 @@ static int nft_check_loops(const struct nft_ctx *ctx,
static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
*nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
@@ -11393,4 +11440,5 @@ module_exit(nf_tables_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Framework for packet filtering and classification");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 711c22ab7..c3e635364 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -115,7 +115,7 @@ static noinline void __nft_trace_verdict(const struct nft_pktinfo *pkt,
{
enum nft_trace_types type;
- switch (regs->verdict.code) {
+ switch (regs->verdict.code & NF_VERDICT_MASK) {
case NFT_CONTINUE:
case NFT_RETURN:
type = NFT_TRACETYPE_RETURN;
@@ -308,10 +308,11 @@ next_rule:
switch (regs.verdict.code & NF_VERDICT_MASK) {
case NF_ACCEPT:
- case NF_DROP:
case NF_QUEUE:
case NF_STOLEN:
return regs.verdict.code;
+ case NF_DROP:
+ return NF_DROP_REASON(pkt->skb, SKB_DROP_REASON_NETFILTER_DROP, EPERM);
}
switch (regs.verdict.code) {
@@ -342,6 +343,9 @@ next_rule:
if (static_branch_unlikely(&nft_counters_enabled))
nft_update_chain_stats(basechain, pkt);
+ if (nft_base_chain(basechain)->policy == NF_DROP)
+ return NF_DROP_REASON(pkt->skb, SKB_DROP_REASON_NETFILTER_DROP, EPERM);
+
return nft_base_chain(basechain)->policy;
}
EXPORT_SYMBOL_GPL(nft_do_chain);
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index 6d41c0bd3..a83637e3f 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -258,17 +258,21 @@ void nft_trace_notify(const struct nft_pktinfo *pkt,
case __NFT_TRACETYPE_MAX:
break;
case NFT_TRACETYPE_RETURN:
- case NFT_TRACETYPE_RULE:
+ case NFT_TRACETYPE_RULE: {
+ unsigned int v;
+
if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, verdict))
goto nla_put_failure;
/* pkt->skb undefined iff NF_STOLEN, disable dump */
- if (verdict->code == NF_STOLEN)
+ v = verdict->code & NF_VERDICT_MASK;
+ if (v == NF_STOLEN)
info->packet_dumped = true;
else
mark = pkt->skb->mark;
break;
+ }
case NFT_TRACETYPE_POLICY:
mark = pkt->skb->mark;
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 50723ba08..c0fc43199 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -447,4 +447,5 @@ module_init(nfnl_osf_init);
module_exit(nfnl_osf_fini);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Passive OS fingerprint matching");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 556bc902a..5cf38fc0a 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -228,19 +228,29 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
const struct nf_ct_hook *ct_hook;
- int err;
if (verdict == NF_ACCEPT ||
verdict == NF_REPEAT ||
verdict == NF_STOP) {
+ unsigned int ct_verdict = verdict;
+
rcu_read_lock();
ct_hook = rcu_dereference(nf_ct_hook);
- if (ct_hook) {
- err = ct_hook->update(entry->state.net, entry->skb);
- if (err < 0)
- verdict = NF_DROP;
- }
+ if (ct_hook)
+ ct_verdict = ct_hook->update(entry->state.net, entry->skb);
rcu_read_unlock();
+
+ switch (ct_verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ /* follow userspace verdict, could be REPEAT */
+ break;
+ case NF_STOLEN:
+ nf_queue_entry_free(entry);
+ return;
+ default:
+ verdict = ct_verdict & NF_VERDICT_MASK;
+ break;
+ }
}
nf_reinject(entry, verdict);
}
diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c
index 98e494610..40e230d8b 100644
--- a/net/netfilter/nft_chain_nat.c
+++ b/net/netfilter/nft_chain_nat.c
@@ -137,6 +137,7 @@ module_init(nft_chain_nat_init);
module_exit(nft_chain_nat_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("nftables network address translation support");
#ifdef CONFIG_NF_TABLES_IPV4
MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");
#endif
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index f0eeda97b..1f9474fef 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -135,7 +135,7 @@ static void nft_target_eval_bridge(const struct nft_expr *expr,
static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = {
[NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING },
- [NFTA_TARGET_REV] = { .type = NLA_U32 },
+ [NFTA_TARGET_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_TARGET_INFO] = { .type = NLA_BINARY },
};
@@ -200,6 +200,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1]
static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
{
struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1];
+ u32 l4proto;
u32 flags;
int err;
@@ -212,12 +213,18 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
return -EINVAL;
flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS]));
- if (flags & ~NFT_RULE_COMPAT_F_MASK)
+ if (flags & NFT_RULE_COMPAT_F_UNUSED ||
+ flags & ~NFT_RULE_COMPAT_F_MASK)
return -EINVAL;
if (flags & NFT_RULE_COMPAT_F_INV)
*inv = true;
- *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+ l4proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+ if (l4proto > U16_MAX)
+ return -EINVAL;
+
+ *proto = l4proto;
+
return 0;
}
@@ -419,7 +426,7 @@ static void nft_match_eval(const struct nft_expr *expr,
static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
[NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING },
- [NFTA_MATCH_REV] = { .type = NLA_U32 },
+ [NFTA_MATCH_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_MATCH_INFO] = { .type = NLA_BINARY },
};
@@ -724,7 +731,7 @@ out_put:
static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
[NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING,
.len = NFT_COMPAT_NAME_MAX-1 },
- [NFTA_COMPAT_REV] = { .type = NLA_U32 },
+ [NFTA_COMPAT_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_COMPAT_TYPE] = { .type = NLA_U32 },
};
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 86bb9d779..bfd3e5a14 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -476,6 +476,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
break;
#endif
case NFT_CT_ID:
+ if (tb[NFTA_CT_DIRECTION])
+ return -EINVAL;
+
len = sizeof(u32);
break;
default:
@@ -1250,7 +1253,31 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
if (tb[NFTA_CT_EXPECT_L3PROTO])
priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO]));
+ switch (priv->l3num) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ if (priv->l3num != ctx->family)
+ return -EINVAL;
+
+ fallthrough;
+ case NFPROTO_INET:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]);
+ switch (priv->l4proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ case IPPROTO_DCCP:
+ case IPPROTO_SCTP:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]);
priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]);
priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]);
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 629a91a8c..c09dba573 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -44,33 +44,34 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv,
return 0;
}
-static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
- struct nft_regs *regs)
+static struct nft_elem_priv *nft_dynset_new(struct nft_set *set,
+ const struct nft_expr *expr,
+ struct nft_regs *regs)
{
const struct nft_dynset *priv = nft_expr_priv(expr);
struct nft_set_ext *ext;
+ void *elem_priv;
u64 timeout;
- void *elem;
if (!atomic_add_unless(&set->nelems, 1, set->size))
return NULL;
timeout = priv->timeout ? : set->timeout;
- elem = nft_set_elem_init(set, &priv->tmpl,
- &regs->data[priv->sreg_key], NULL,
- &regs->data[priv->sreg_data],
- timeout, 0, GFP_ATOMIC);
- if (IS_ERR(elem))
+ elem_priv = nft_set_elem_init(set, &priv->tmpl,
+ &regs->data[priv->sreg_key], NULL,
+ &regs->data[priv->sreg_data],
+ timeout, 0, GFP_ATOMIC);
+ if (IS_ERR(elem_priv))
goto err1;
- ext = nft_set_elem_ext(set, elem);
+ ext = nft_set_elem_ext(set, elem_priv);
if (priv->num_exprs && nft_dynset_expr_setup(priv, ext) < 0)
goto err2;
- return elem;
+ return elem_priv;
err2:
- nft_set_elem_destroy(set, elem, false);
+ nft_set_elem_destroy(set, elem_priv, false);
err1:
if (set->size)
atomic_dec(&set->nelems);
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index ca905aa82..37cfe6dd7 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -208,4 +208,5 @@ bool nft_fib_reduce(struct nft_regs_track *track,
EXPORT_SYMBOL_GPL(nft_fib_reduce);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Query routing table from nftables");
MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index a5268e6dd..358e742af 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -270,4 +270,5 @@ module_exit(nft_fwd_netdev_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("nftables netdev packet forwarding support");
MODULE_ALIAS_NFT_AF_EXPR(5, "fwd");
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 1e5e7a181..32df7a168 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -13,6 +13,7 @@
#include <net/netfilter/nf_tables_core.h>
struct nft_bitmap_elem {
+ struct nft_elem_priv priv;
struct list_head head;
struct nft_set_ext ext;
};
@@ -104,8 +105,9 @@ nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this,
return NULL;
}
-static void *nft_bitmap_get(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem, unsigned int flags)
+static struct nft_elem_priv *
+nft_bitmap_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
{
const struct nft_bitmap *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
@@ -116,23 +118,23 @@ static void *nft_bitmap_get(const struct net *net, const struct nft_set *set,
!nft_set_elem_active(&be->ext, genmask))
continue;
- return be;
+ return &be->priv;
}
return ERR_PTR(-ENOENT);
}
static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **ext)
+ struct nft_elem_priv **elem_priv)
{
+ struct nft_bitmap_elem *new = nft_elem_priv_cast(elem->priv), *be;
struct nft_bitmap *priv = nft_set_priv(set);
- struct nft_bitmap_elem *new = elem->priv, *be;
u8 genmask = nft_genmask_next(net);
u32 idx, off;
be = nft_bitmap_elem_find(set, new, genmask);
if (be) {
- *ext = &be->ext;
+ *elem_priv = &be->priv;
return -EEXIST;
}
@@ -144,12 +146,11 @@ static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
return 0;
}
-static void nft_bitmap_remove(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
+static void nft_bitmap_remove(const struct net *net, const struct nft_set *set,
+ struct nft_elem_priv *elem_priv)
{
+ struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv);
struct nft_bitmap *priv = nft_set_priv(set);
- struct nft_bitmap_elem *be = elem->priv;
u8 genmask = nft_genmask_next(net);
u32 idx, off;
@@ -161,10 +162,10 @@ static void nft_bitmap_remove(const struct net *net,
static void nft_bitmap_activate(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
+ struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv);
struct nft_bitmap *priv = nft_set_priv(set);
- struct nft_bitmap_elem *be = elem->priv;
u8 genmask = nft_genmask_next(net);
u32 idx, off;
@@ -174,28 +175,27 @@ static void nft_bitmap_activate(const struct net *net,
nft_set_elem_change_active(net, set, &be->ext);
}
-static bool nft_bitmap_flush(const struct net *net,
- const struct nft_set *set, void *_be)
+static void nft_bitmap_flush(const struct net *net,
+ const struct nft_set *set,
+ struct nft_elem_priv *elem_priv)
{
+ struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv);
struct nft_bitmap *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
- struct nft_bitmap_elem *be = _be;
u32 idx, off;
nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
/* Enter 10 state, similar to deactivation. */
priv->bitmap[idx] &= ~(genmask << off);
nft_set_elem_change_active(net, set, &be->ext);
-
- return true;
}
-static void *nft_bitmap_deactivate(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
+static struct nft_elem_priv *
+nft_bitmap_deactivate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
+ struct nft_bitmap_elem *this = nft_elem_priv_cast(elem->priv), *be;
struct nft_bitmap *priv = nft_set_priv(set);
- struct nft_bitmap_elem *this = elem->priv, *be;
u8 genmask = nft_genmask_next(net);
u32 idx, off;
@@ -209,7 +209,7 @@ static void *nft_bitmap_deactivate(const struct net *net,
priv->bitmap[idx] &= ~(genmask << off);
nft_set_elem_change_active(net, set, &be->ext);
- return be;
+ return &be->priv;
}
static void nft_bitmap_walk(const struct nft_ctx *ctx,
@@ -218,7 +218,6 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx,
{
const struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *be;
- struct nft_set_elem elem;
list_for_each_entry_rcu(be, &priv->list, head) {
if (iter->count < iter->skip)
@@ -226,9 +225,7 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx,
if (!nft_set_elem_active(&be->ext, iter->genmask))
goto cont;
- elem.priv = be;
-
- iter->err = iter->fn(ctx, set, iter, &elem);
+ iter->err = iter->fn(ctx, set, iter, &be->priv);
if (iter->err < 0)
return;
@@ -265,6 +262,8 @@ static int nft_bitmap_init(const struct nft_set *set,
{
struct nft_bitmap *priv = nft_set_priv(set);
+ BUILD_BUG_ON(offsetof(struct nft_bitmap_elem, priv) != 0);
+
INIT_LIST_HEAD(&priv->list);
priv->bitmap_size = nft_bitmap_size(set->klen);
@@ -278,7 +277,7 @@ static void nft_bitmap_destroy(const struct nft_ctx *ctx,
struct nft_bitmap_elem *be, *n;
list_for_each_entry_safe(be, n, &priv->list, head)
- nf_tables_set_elem_destroy(ctx, set, be);
+ nf_tables_set_elem_destroy(ctx, set, &be->priv);
}
static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 2013de934..6968a3b34 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -27,6 +27,7 @@ struct nft_rhash {
};
struct nft_rhash_elem {
+ struct nft_elem_priv priv;
struct rhash_head node;
struct nft_set_ext ext;
};
@@ -35,6 +36,7 @@ struct nft_rhash_cmp_arg {
const struct nft_set *set;
const u32 *key;
u8 genmask;
+ u64 tstamp;
};
static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed)
@@ -61,7 +63,7 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
return 1;
if (nft_set_elem_is_dead(&he->ext))
return 1;
- if (nft_set_elem_expired(&he->ext))
+ if (__nft_set_elem_expired(&he->ext, x->tstamp))
return 1;
if (!nft_set_elem_active(&he->ext, x->genmask))
return 1;
@@ -86,6 +88,7 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_cur(net),
.set = set,
.key = key,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@@ -95,8 +98,9 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
return !!he;
}
-static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem, unsigned int flags)
+static struct nft_elem_priv *
+nft_rhash_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
{
struct nft_rhash *priv = nft_set_priv(set);
struct nft_rhash_elem *he;
@@ -104,17 +108,19 @@ static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_cur(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
if (he != NULL)
- return he;
+ return &he->priv;
return ERR_PTR(-ENOENT);
}
static bool nft_rhash_update(struct nft_set *set, const u32 *key,
- void *(*new)(struct nft_set *,
+ struct nft_elem_priv *
+ (*new)(struct nft_set *,
const struct nft_expr *,
struct nft_regs *regs),
const struct nft_expr *expr,
@@ -123,20 +129,23 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
{
struct nft_rhash *priv = nft_set_priv(set);
struct nft_rhash_elem *he, *prev;
+ struct nft_elem_priv *elem_priv;
struct nft_rhash_cmp_arg arg = {
.genmask = NFT_GENMASK_ANY,
.set = set,
.key = key,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
if (he != NULL)
goto out;
- he = new(set, expr, regs);
- if (he == NULL)
+ elem_priv = new(set, expr, regs);
+ if (!elem_priv)
goto err1;
+ he = nft_elem_priv_cast(elem_priv);
prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
nft_rhash_params);
if (IS_ERR(prev))
@@ -144,7 +153,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
/* Another cpu may race to insert the element with the same key */
if (prev) {
- nft_set_elem_destroy(set, he, true);
+ nft_set_elem_destroy(set, &he->priv, true);
atomic_dec(&set->nelems);
he = prev;
}
@@ -154,7 +163,7 @@ out:
return true;
err2:
- nft_set_elem_destroy(set, he, true);
+ nft_set_elem_destroy(set, &he->priv, true);
atomic_dec(&set->nelems);
err1:
return false;
@@ -162,14 +171,15 @@ err1:
static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **ext)
+ struct nft_elem_priv **elem_priv)
{
+ struct nft_rhash_elem *he = nft_elem_priv_cast(elem->priv);
struct nft_rhash *priv = nft_set_priv(set);
- struct nft_rhash_elem *he = elem->priv;
struct nft_rhash_cmp_arg arg = {
.genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = nft_net_tstamp(net),
};
struct nft_rhash_elem *prev;
@@ -178,33 +188,32 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
if (IS_ERR(prev))
return PTR_ERR(prev);
if (prev) {
- *ext = &prev->ext;
+ *elem_priv = &prev->priv;
return -EEXIST;
}
return 0;
}
static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- struct nft_rhash_elem *he = elem->priv;
+ struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &he->ext);
}
-static bool nft_rhash_flush(const struct net *net,
- const struct nft_set *set, void *priv)
+static void nft_rhash_flush(const struct net *net,
+ const struct nft_set *set,
+ struct nft_elem_priv *elem_priv)
{
- struct nft_rhash_elem *he = priv;
+ struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &he->ext);
-
- return true;
}
-static void *nft_rhash_deactivate(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
+static struct nft_elem_priv *
+nft_rhash_deactivate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
struct nft_rhash *priv = nft_set_priv(set);
struct nft_rhash_elem *he;
@@ -212,6 +221,7 @@ static void *nft_rhash_deactivate(const struct net *net,
.genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = nft_net_tstamp(net),
};
rcu_read_lock();
@@ -221,15 +231,15 @@ static void *nft_rhash_deactivate(const struct net *net,
rcu_read_unlock();
- return he;
+ return &he->priv;
}
static void nft_rhash_remove(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
+ struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv);
struct nft_rhash *priv = nft_set_priv(set);
- struct nft_rhash_elem *he = elem->priv;
rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
}
@@ -260,7 +270,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_rhash *priv = nft_set_priv(set);
struct nft_rhash_elem *he;
struct rhashtable_iter hti;
- struct nft_set_elem elem;
rhashtable_walk_enter(&priv->ht, &hti);
rhashtable_walk_start(&hti);
@@ -280,9 +289,7 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
if (!nft_set_elem_active(&he->ext, iter->genmask))
goto cont;
- elem.priv = he;
-
- iter->err = iter->fn(ctx, set, iter, &elem);
+ iter->err = iter->fn(ctx, set, iter, &he->priv);
if (iter->err < 0)
break;
@@ -406,6 +413,8 @@ static int nft_rhash_init(const struct nft_set *set,
struct rhashtable_params params = nft_rhash_params;
int err;
+ BUILD_BUG_ON(offsetof(struct nft_rhash_elem, priv) != 0);
+
params.nelem_hint = desc->size ?: NFT_RHASH_ELEMENT_HINT;
params.key_len = set->klen;
@@ -428,8 +437,9 @@ struct nft_rhash_ctx {
static void nft_rhash_elem_destroy(void *ptr, void *arg)
{
struct nft_rhash_ctx *rhash_ctx = arg;
+ struct nft_rhash_elem *he = ptr;
- nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr);
+ nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, &he->priv);
}
static void nft_rhash_destroy(const struct nft_ctx *ctx,
@@ -476,6 +486,7 @@ struct nft_hash {
};
struct nft_hash_elem {
+ struct nft_elem_priv priv;
struct hlist_node node;
struct nft_set_ext ext;
};
@@ -501,8 +512,9 @@ bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
return false;
}
-static void *nft_hash_get(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem, unsigned int flags)
+static struct nft_elem_priv *
+nft_hash_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
{
struct nft_hash *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
@@ -514,7 +526,7 @@ static void *nft_hash_get(const struct net *net, const struct nft_set *set,
hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) &&
nft_set_elem_active(&he->ext, genmask))
- return he;
+ return &he->priv;
}
return ERR_PTR(-ENOENT);
}
@@ -562,9 +574,9 @@ static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv,
static int nft_hash_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **ext)
+ struct nft_elem_priv **elem_priv)
{
- struct nft_hash_elem *this = elem->priv, *he;
+ struct nft_hash_elem *this = nft_elem_priv_cast(elem->priv), *he;
struct nft_hash *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
u32 hash;
@@ -574,7 +586,7 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set,
if (!memcmp(nft_set_ext_key(&this->ext),
nft_set_ext_key(&he->ext), set->klen) &&
nft_set_elem_active(&he->ext, genmask)) {
- *ext = &he->ext;
+ *elem_priv = &he->priv;
return -EEXIST;
}
}
@@ -583,28 +595,28 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set,
}
static void nft_hash_activate(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- struct nft_hash_elem *he = elem->priv;
+ struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &he->ext);
}
-static bool nft_hash_flush(const struct net *net,
- const struct nft_set *set, void *priv)
+static void nft_hash_flush(const struct net *net,
+ const struct nft_set *set,
+ struct nft_elem_priv *elem_priv)
{
- struct nft_hash_elem *he = priv;
+ struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &he->ext);
- return true;
}
-static void *nft_hash_deactivate(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
+static struct nft_elem_priv *
+nft_hash_deactivate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
+ struct nft_hash_elem *this = nft_elem_priv_cast(elem->priv), *he;
struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *this = elem->priv, *he;
u8 genmask = nft_genmask_next(net);
u32 hash;
@@ -614,7 +626,7 @@ static void *nft_hash_deactivate(const struct net *net,
set->klen) &&
nft_set_elem_active(&he->ext, genmask)) {
nft_set_elem_change_active(net, set, &he->ext);
- return he;
+ return &he->priv;
}
}
return NULL;
@@ -622,9 +634,9 @@ static void *nft_hash_deactivate(const struct net *net,
static void nft_hash_remove(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- struct nft_hash_elem *he = elem->priv;
+ struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv);
hlist_del_rcu(&he->node);
}
@@ -634,7 +646,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_elem *he;
- struct nft_set_elem elem;
int i;
for (i = 0; i < priv->buckets; i++) {
@@ -644,9 +655,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
if (!nft_set_elem_active(&he->ext, iter->genmask))
goto cont;
- elem.priv = he;
-
- iter->err = iter->fn(ctx, set, iter, &elem);
+ iter->err = iter->fn(ctx, set, iter, &he->priv);
if (iter->err < 0)
return;
cont:
@@ -685,7 +694,7 @@ static void nft_hash_destroy(const struct nft_ctx *ctx,
for (i = 0; i < priv->buckets; i++) {
hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
hlist_del_rcu(&he->node);
- nf_tables_set_elem_destroy(ctx, set, he);
+ nf_tables_set_elem_destroy(ctx, set, &he->priv);
}
}
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 3ff31043f..3089c4ca8 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -342,9 +342,6 @@
#include "nft_set_pipapo_avx2.h"
#include "nft_set_pipapo.h"
-/* Current working bitmap index, toggled between field matches */
-static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index);
-
/**
* pipapo_refill() - For each set bit, set bits from selected mapping table item
* @map: Bitmap to be scanned for set bits
@@ -412,6 +409,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_scratch *scratch;
unsigned long *res_map, *fill_map;
u8 genmask = nft_genmask_cur(net);
const u8 *rp = (const u8 *)key;
@@ -422,15 +420,17 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
local_bh_disable();
- map_index = raw_cpu_read(nft_pipapo_scratch_index);
-
m = rcu_dereference(priv->match);
if (unlikely(!m || !*raw_cpu_ptr(m->scratch)))
goto out;
- res_map = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0);
- fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max);
+ scratch = *raw_cpu_ptr(m->scratch);
+
+ map_index = scratch->map_index;
+
+ res_map = scratch->map + (map_index ? m->bsize_max : 0);
+ fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
@@ -460,7 +460,7 @@ next_match:
b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
last);
if (b < 0) {
- raw_cpu_write(nft_pipapo_scratch_index, map_index);
+ scratch->map_index = map_index;
local_bh_enable();
return false;
@@ -477,7 +477,7 @@ next_match:
* current inactive bitmap is clean and can be reused as
* *next* bitmap (not initial) for the next packet.
*/
- raw_cpu_write(nft_pipapo_scratch_index, map_index);
+ scratch->map_index = map_index;
local_bh_enable();
return true;
@@ -504,6 +504,7 @@ out:
* @set: nftables API set representation
* @data: Key data to be matched against existing elements
* @genmask: If set, check that element is active in given genmask
+ * @tstamp: timestamp to check for expired elements
*
* This is essentially the same as the lookup function, except that it matches
* key data against the uncommitted copy and doesn't use preallocated maps for
@@ -513,7 +514,8 @@ out:
*/
static struct nft_pipapo_elem *pipapo_get(const struct net *net,
const struct nft_set *set,
- const u8 *data, u8 genmask)
+ const u8 *data, u8 genmask,
+ u64 tstamp)
{
struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
struct nft_pipapo *priv = nft_set_priv(set);
@@ -566,7 +568,7 @@ next_match:
goto out;
if (last) {
- if (nft_set_elem_expired(&f->mt[b].e->ext))
+ if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp))
goto next_match;
if ((genmask &&
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
@@ -599,11 +601,18 @@ out:
* @elem: nftables API element representation containing key data
* @flags: Unused
*/
-static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem, unsigned int flags)
+static struct nft_elem_priv *
+nft_pipapo_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
{
- return pipapo_get(net, set, (const u8 *)elem->key.val.data,
- nft_genmask_cur(net));
+ struct nft_pipapo_elem *e;
+
+ e = pipapo_get(net, set, (const u8 *)elem->key.val.data,
+ nft_genmask_cur(net), get_jiffies_64());
+ if (IS_ERR(e))
+ return ERR_CAST(e);
+
+ return &e->priv;
}
/**
@@ -1102,6 +1111,25 @@ static void pipapo_map(struct nft_pipapo_match *m,
}
/**
+ * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address
+ * @m: Matching data
+ * @cpu: CPU number
+ */
+static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu)
+{
+ struct nft_pipapo_scratch *s;
+ void *mem;
+
+ s = *per_cpu_ptr(m->scratch, cpu);
+ if (!s)
+ return;
+
+ mem = s;
+ mem -= s->align_off;
+ kfree(mem);
+}
+
+/**
* pipapo_realloc_scratch() - Reallocate scratch maps for partial match results
* @clone: Copy of matching data with pending insertions and deletions
* @bsize_max: Maximum bucket size, scratch maps cover two buckets
@@ -1114,12 +1142,13 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
int i;
for_each_possible_cpu(i) {
- unsigned long *scratch;
+ struct nft_pipapo_scratch *scratch;
#ifdef NFT_PIPAPO_ALIGN
- unsigned long *scratch_aligned;
+ void *scratch_aligned;
+ u32 align_off;
#endif
-
- scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 +
+ scratch = kzalloc_node(struct_size(scratch, map,
+ bsize_max * 2) +
NFT_PIPAPO_ALIGN_HEADROOM,
GFP_KERNEL, cpu_to_node(i));
if (!scratch) {
@@ -1133,14 +1162,25 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
return -ENOMEM;
}
- kfree(*per_cpu_ptr(clone->scratch, i));
-
- *per_cpu_ptr(clone->scratch, i) = scratch;
+ pipapo_free_scratch(clone, i);
#ifdef NFT_PIPAPO_ALIGN
- scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch);
- *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned;
+ /* Align &scratch->map (not the struct itself): the extra
+ * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node()
+ * above guarantee we can waste up to those bytes in order
+ * to align the map field regardless of its offset within
+ * the struct.
+ */
+ BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM);
+
+ scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map);
+ scratch_aligned -= offsetof(struct nft_pipapo_scratch, map);
+ align_off = scratch_aligned - (void *)scratch;
+
+ scratch = scratch_aligned;
+ scratch->align_off = align_off;
#endif
+ *per_cpu_ptr(clone->scratch, i) = scratch;
}
return 0;
@@ -1151,21 +1191,22 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
* @net: Network namespace
* @set: nftables API set representation
* @elem: nftables API element representation containing key data
- * @ext2: Filled with pointer to &struct nft_set_ext in inserted element
+ * @elem_priv: Filled with pointer to &struct nft_set_ext in inserted element
*
* Return: 0 on success, error pointer on failure.
*/
static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **ext2)
+ struct nft_elem_priv **elem_priv)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const u8 *start = (const u8 *)elem->key.val.data, *end;
- struct nft_pipapo_elem *e = elem->priv, *dup;
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m = priv->clone;
u8 genmask = nft_genmask_next(net);
+ struct nft_pipapo_elem *e, *dup;
+ u64 tstamp = nft_net_tstamp(net);
struct nft_pipapo_field *f;
const u8 *start_p, *end_p;
int i, bsize_max, err = 0;
@@ -1175,7 +1216,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
else
end = start;
- dup = pipapo_get(net, set, start, genmask);
+ dup = pipapo_get(net, set, start, genmask, tstamp);
if (!IS_ERR(dup)) {
/* Check if we already have the same exact entry */
const struct nft_data *dup_key, *dup_end;
@@ -1188,7 +1229,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (!memcmp(start, dup_key->data, sizeof(*dup_key->data)) &&
!memcmp(end, dup_end->data, sizeof(*dup_end->data))) {
- *ext2 = &dup->ext;
+ *elem_priv = &dup->priv;
return -EEXIST;
}
@@ -1197,13 +1238,13 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (PTR_ERR(dup) == -ENOENT) {
/* Look for partially overlapping entries */
- dup = pipapo_get(net, set, end, nft_genmask_next(net));
+ dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp);
}
if (PTR_ERR(dup) != -ENOENT) {
if (IS_ERR(dup))
return PTR_ERR(dup);
- *ext2 = &dup->ext;
+ *elem_priv = &dup->priv;
return -ENOTEMPTY;
}
@@ -1263,7 +1304,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
put_cpu_ptr(m->scratch);
}
- *ext2 = &e->ext;
+ e = nft_elem_priv_cast(elem->priv);
+ *elem_priv = &e->priv;
pipapo_map(m, rulemap, e);
@@ -1293,11 +1335,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
if (!new->scratch)
goto out_scratch;
-#ifdef NFT_PIPAPO_ALIGN
- new->scratch_aligned = alloc_percpu(*new->scratch_aligned);
- if (!new->scratch_aligned)
- goto out_scratch;
-#endif
for_each_possible_cpu(i)
*per_cpu_ptr(new->scratch, i) = NULL;
@@ -1349,10 +1386,7 @@ out_lt:
}
out_scratch_realloc:
for_each_possible_cpu(i)
- kfree(*per_cpu_ptr(new->scratch, i));
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(new->scratch_aligned);
-#endif
+ pipapo_free_scratch(new, i);
out_scratch:
free_percpu(new->scratch);
kfree(new);
@@ -1540,23 +1574,19 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
struct nft_pipapo_elem *e)
{
- struct nft_set_elem elem = {
- .priv = e,
- };
-
- nft_setelem_data_deactivate(net, set, &elem);
+ nft_setelem_data_deactivate(net, set, &e->priv);
}
/**
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
- * @_set: nftables API set representation
+ * @set: nftables API set representation
* @m: Matching data
*/
-static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
+static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
{
- struct nft_set *set = (struct nft_set *) _set;
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
+ u64 tstamp = nft_net_tstamp(net);
int rules_f0, first_rule = 0;
struct nft_pipapo_elem *e;
struct nft_trans_gc *gc;
@@ -1591,7 +1621,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
/* synchronous gc never fails, there is no need to set on
* NFT_SET_ELEM_DEAD_BIT.
*/
- if (nft_set_elem_expired(&e->ext)) {
+ if (__nft_set_elem_expired(&e->ext, tstamp)) {
priv->dirty = true;
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
@@ -1637,13 +1667,9 @@ static void pipapo_free_match(struct nft_pipapo_match *m)
int i;
for_each_possible_cpu(i)
- kfree(*per_cpu_ptr(m->scratch, i));
+ pipapo_free_scratch(m, i);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
free_percpu(m->scratch);
-
pipapo_free_fields(m);
kfree(m);
@@ -1672,7 +1698,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
* We also need to create a new working copy for subsequent insertions and
* deletions.
*/
-static void nft_pipapo_commit(const struct nft_set *set)
+static void nft_pipapo_commit(struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *new_clone, *old;
@@ -1732,7 +1758,7 @@ static void nft_pipapo_abort(const struct nft_set *set)
* nft_pipapo_activate() - Mark element reference as active given key, commit
* @net: Network namespace
* @set: nftables API set representation
- * @elem: nftables API element representation containing key data
+ * @elem_priv: nftables API element representation containing key data
*
* On insertion, elements are added to a copy of the matching data currently
* in use for lookups, and not directly inserted into current lookup data. Both
@@ -1741,9 +1767,9 @@ static void nft_pipapo_abort(const struct nft_set *set)
*/
static void nft_pipapo_activate(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- struct nft_pipapo_elem *e = elem->priv;
+ struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &e->ext);
}
@@ -1766,7 +1792,7 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
{
struct nft_pipapo_elem *e;
- e = pipapo_get(net, set, data, nft_genmask_next(net));
+ e = pipapo_get(net, set, data, nft_genmask_next(net), nft_net_tstamp(net));
if (IS_ERR(e))
return NULL;
@@ -1783,9 +1809,9 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
*
* Return: deactivated element if found, NULL otherwise.
*/
-static void *nft_pipapo_deactivate(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
+static struct nft_elem_priv *
+nft_pipapo_deactivate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
@@ -1796,7 +1822,7 @@ static void *nft_pipapo_deactivate(const struct net *net,
* nft_pipapo_flush() - Call pipapo_deactivate() to make element inactive
* @net: Network namespace
* @set: nftables API set representation
- * @elem: nftables API element representation containing key data
+ * @elem_priv: nftables API element representation containing key data
*
* This is functionally the same as nft_pipapo_deactivate(), with a slightly
* different interface, and it's also called once for each element in a set
@@ -1810,13 +1836,12 @@ static void *nft_pipapo_deactivate(const struct net *net,
*
* Return: true if element was found and deactivated.
*/
-static bool nft_pipapo_flush(const struct net *net, const struct nft_set *set,
- void *elem)
+static void nft_pipapo_flush(const struct net *net, const struct nft_set *set,
+ struct nft_elem_priv *elem_priv)
{
- struct nft_pipapo_elem *e = elem;
+ struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv);
- return pipapo_deactivate(net, set, (const u8 *)nft_set_ext_key(&e->ext),
- &e->ext);
+ nft_set_elem_change_active(net, set, &e->ext);
}
/**
@@ -1939,7 +1964,7 @@ static bool pipapo_match_field(struct nft_pipapo_field *f,
* nft_pipapo_remove() - Remove element given key, commit
* @net: Network namespace
* @set: nftables API set representation
- * @elem: nftables API element representation containing key data
+ * @elem_priv: nftables API element representation containing key data
*
* Similarly to nft_pipapo_activate(), this is used as commit operation by the
* API, but it's called once per element in the pending transaction, so we can't
@@ -1947,14 +1972,15 @@ static bool pipapo_match_field(struct nft_pipapo_field *f,
* the matched element here, if any, and commit the updated matching data.
*/
static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m = priv->clone;
- struct nft_pipapo_elem *e = elem->priv;
int rules_f0, first_rule = 0;
+ struct nft_pipapo_elem *e;
const u8 *data;
+ e = nft_elem_priv_cast(elem_priv);
data = (const u8 *)nft_set_ext_key(&e->ext);
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
@@ -2031,7 +2057,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
for (r = 0; r < f->rules; r++) {
struct nft_pipapo_elem *e;
- struct nft_set_elem elem;
if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
continue;
@@ -2044,9 +2069,7 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
if (!nft_set_elem_active(&e->ext, iter->genmask))
goto cont;
- elem.priv = e;
-
- iter->err = iter->fn(ctx, set, iter, &elem);
+ iter->err = iter->fn(ctx, set, iter, &e->priv);
if (iter->err < 0)
goto out;
@@ -2118,6 +2141,8 @@ static int nft_pipapo_init(const struct nft_set *set,
struct nft_pipapo_field *f;
int err, i, field_count;
+ BUILD_BUG_ON(offsetof(struct nft_pipapo_elem, priv) != 0);
+
field_count = desc->field_count ? : 1;
if (field_count > NFT_PIPAPO_MAX_FIELDS)
@@ -2130,7 +2155,7 @@ static int nft_pipapo_init(const struct nft_set *set,
m->field_count = field_count;
m->bsize_max = 0;
- m->scratch = alloc_percpu(unsigned long *);
+ m->scratch = alloc_percpu(struct nft_pipapo_scratch *);
if (!m->scratch) {
err = -ENOMEM;
goto out_scratch;
@@ -2138,16 +2163,6 @@ static int nft_pipapo_init(const struct nft_set *set,
for_each_possible_cpu(i)
*per_cpu_ptr(m->scratch, i) = NULL;
-#ifdef NFT_PIPAPO_ALIGN
- m->scratch_aligned = alloc_percpu(unsigned long *);
- if (!m->scratch_aligned) {
- err = -ENOMEM;
- goto out_free;
- }
- for_each_possible_cpu(i)
- *per_cpu_ptr(m->scratch_aligned, i) = NULL;
-#endif
-
rcu_head_init(&m->rcu);
nft_pipapo_for_each_field(f, i, m) {
@@ -2178,9 +2193,6 @@ static int nft_pipapo_init(const struct nft_set *set,
return 0;
out_free:
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
free_percpu(m->scratch);
out_scratch:
kfree(m);
@@ -2212,7 +2224,7 @@ static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx,
e = f->mt[r].e;
- nf_tables_set_elem_destroy(ctx, set, e);
+ nf_tables_set_elem_destroy(ctx, set, &e->priv);
}
}
@@ -2234,11 +2246,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
nft_set_pipapo_match_destroy(ctx, set, m);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
for_each_possible_cpu(cpu)
- kfree(*per_cpu_ptr(m->scratch, cpu));
+ pipapo_free_scratch(m, cpu);
free_percpu(m->scratch);
pipapo_free_fields(m);
kfree(m);
@@ -2251,11 +2260,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
if (priv->dirty)
nft_set_pipapo_match_destroy(ctx, set, m);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(priv->clone->scratch_aligned);
-#endif
for_each_possible_cpu(cpu)
- kfree(*per_cpu_ptr(priv->clone->scratch, cpu));
+ pipapo_free_scratch(priv->clone, cpu);
free_percpu(priv->clone->scratch);
pipapo_free_fields(priv->clone);
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 2e164a319..f59a0cd81 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -131,20 +131,28 @@ struct nft_pipapo_field {
};
/**
+ * struct nft_pipapo_scratch - percpu data used for lookup and matching
+ * @map_index: Current working bitmap index, toggled between field matches
+ * @align_off: Offset to get the originally allocated address
+ * @map: store partial matching results during lookup
+ */
+struct nft_pipapo_scratch {
+ u8 map_index;
+ u32 align_off;
+ unsigned long map[];
+};
+
+/**
* struct nft_pipapo_match - Data used for lookup and matching
* @field_count Amount of fields in set
* @scratch: Preallocated per-CPU maps for partial matching results
- * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes
* @bsize_max: Maximum lookup table bucket size of all fields, in longs
* @rcu Matching data is swapped on commits
* @f: Fields, with lookup and mapping tables
*/
struct nft_pipapo_match {
int field_count;
-#ifdef NFT_PIPAPO_ALIGN
- unsigned long * __percpu *scratch_aligned;
-#endif
- unsigned long * __percpu *scratch;
+ struct nft_pipapo_scratch * __percpu *scratch;
size_t bsize_max;
struct rcu_head rcu;
struct nft_pipapo_field f[] __counted_by(field_count);
@@ -170,10 +178,12 @@ struct nft_pipapo_elem;
/**
* struct nft_pipapo_elem - API-facing representation of single set element
+ * @priv: element placeholder
* @ext: nftables API extensions
*/
struct nft_pipapo_elem {
- struct nft_set_ext ext;
+ struct nft_elem_priv priv;
+ struct nft_set_ext ext;
};
int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 52e0d026d..a3a8ddca9 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -57,7 +57,7 @@
/* Jump to label if @reg is zero */
#define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \
- asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
+ asm goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
"je %l[" #label "]" : : : : label)
/* Store 256 bits from YMM register into memory. Contrary to bucket load
@@ -71,9 +71,6 @@
#define NFT_PIPAPO_AVX2_ZERO(reg) \
asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg)
-/* Current working bitmap index, toggled between field matches */
-static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index);
-
/**
* nft_pipapo_avx2_prepare() - Prepare before main algorithm body
*
@@ -1120,11 +1117,12 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
struct nft_pipapo *priv = nft_set_priv(set);
- unsigned long *res, *fill, *scratch;
+ struct nft_pipapo_scratch *scratch;
u8 genmask = nft_genmask_cur(net);
const u8 *rp = (const u8 *)key;
struct nft_pipapo_match *m;
struct nft_pipapo_field *f;
+ unsigned long *res, *fill;
bool map_index;
int i, ret = 0;
@@ -1141,15 +1139,16 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
*/
kernel_fpu_begin_mask(0);
- scratch = *raw_cpu_ptr(m->scratch_aligned);
+ scratch = *raw_cpu_ptr(m->scratch);
if (unlikely(!scratch)) {
kernel_fpu_end();
return false;
}
- map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index);
- res = scratch + (map_index ? m->bsize_max : 0);
- fill = scratch + (map_index ? 0 : m->bsize_max);
+ map_index = scratch->map_index;
+
+ res = scratch->map + (map_index ? m->bsize_max : 0);
+ fill = scratch->map + (map_index ? 0 : m->bsize_max);
/* Starting map doesn't need to be set for this implementation */
@@ -1221,7 +1220,7 @@ next_match:
out:
if (i % 2)
- raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index);
+ scratch->map_index = !map_index;
kernel_fpu_end();
return ret >= 0;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index e34662f4a..9944fe479 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -19,10 +19,11 @@ struct nft_rbtree {
struct rb_root root;
rwlock_t lock;
seqcount_rwlock_t count;
- struct delayed_work gc_work;
+ unsigned long last_gc;
};
struct nft_rbtree_elem {
+ struct nft_elem_priv priv;
struct rb_node node;
struct nft_set_ext ext;
};
@@ -48,8 +49,7 @@ static int nft_rbtree_cmp(const struct nft_set *set,
static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
{
- return nft_set_elem_expired(&rbe->ext) ||
- nft_set_elem_is_dead(&rbe->ext);
+ return nft_set_elem_expired(&rbe->ext);
}
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
@@ -197,8 +197,9 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
return false;
}
-static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem, unsigned int flags)
+static struct nft_elem_priv *
+nft_rbtree_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
{
struct nft_rbtree *priv = nft_set_priv(set);
unsigned int seq = read_seqcount_begin(&priv->count);
@@ -209,33 +210,31 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
if (ret || !read_seqcount_retry(&priv->count, seq))
- return rbe;
+ return &rbe->priv;
read_lock_bh(&priv->lock);
seq = read_seqcount_begin(&priv->count);
ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
- if (!ret)
- rbe = ERR_PTR(-ENOENT);
read_unlock_bh(&priv->lock);
- return rbe;
+ if (!ret)
+ return ERR_PTR(-ENOENT);
+
+ return &rbe->priv;
}
-static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
- struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe)
+static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
{
- struct nft_set_elem elem = {
- .priv = rbe,
- };
-
- nft_setelem_data_deactivate(net, set, &elem);
+ lockdep_assert_held_write(&priv->lock);
+ nft_setelem_data_deactivate(net, set, &rbe->priv);
rb_erase(&rbe->node, &priv->root);
}
static const struct nft_rbtree_elem *
nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe, u8 genmask)
+ struct nft_rbtree_elem *rbe)
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
@@ -254,7 +253,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
while (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
if (nft_rbtree_interval_end(rbe_prev) &&
- nft_set_elem_active(&rbe_prev->ext, genmask))
+ nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY))
break;
prev = rb_prev(prev);
@@ -263,7 +262,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
rbe_prev = NULL;
if (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
- nft_rbtree_gc_remove(net, set, priv, rbe_prev);
+ nft_rbtree_gc_elem_remove(net, set, priv, rbe_prev);
/* There is always room in this trans gc for this element,
* memory allocation never actually happens, hence, the warning
@@ -277,7 +276,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
nft_trans_gc_elem_add(gc, rbe_prev);
}
- nft_rbtree_gc_remove(net, set, priv, rbe);
+ nft_rbtree_gc_elem_remove(net, set, priv, rbe);
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
if (WARN_ON_ONCE(!gc))
return ERR_PTR(-ENOMEM);
@@ -307,13 +306,14 @@ static bool nft_rbtree_update_first(const struct nft_set *set,
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
- struct nft_set_ext **ext)
+ struct nft_elem_priv **elem_priv)
{
struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
struct rb_node *node, *next, *parent, **p, *first = NULL;
struct nft_rbtree *priv = nft_set_priv(set);
u8 cur_genmask = nft_genmask_cur(net);
u8 genmask = nft_genmask_next(net);
+ u64 tstamp = nft_net_tstamp(net);
int d;
/* Descend the tree to search for an existing element greater than the
@@ -361,11 +361,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
/* perform garbage collection to avoid bogus overlap reports
* but skip new elements in this transaction.
*/
- if (nft_set_elem_expired(&rbe->ext) &&
+ if (__nft_set_elem_expired(&rbe->ext, tstamp) &&
nft_set_elem_active(&rbe->ext, cur_genmask)) {
const struct nft_rbtree_elem *removed_end;
- removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask);
+ removed_end = nft_rbtree_gc_elem(set, priv, rbe);
if (IS_ERR(removed_end))
return PTR_ERR(removed_end);
@@ -424,7 +424,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
*/
if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) &&
nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) {
- *ext = &rbe_ge->ext;
+ *elem_priv = &rbe_ge->priv;
return -EEXIST;
}
@@ -433,7 +433,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
*/
if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) &&
nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) {
- *ext = &rbe_le->ext;
+ *elem_priv = &rbe_le->priv;
return -EEXIST;
}
@@ -485,10 +485,10 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **ext)
+ struct nft_elem_priv **elem_priv)
{
+ struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv);
struct nft_rbtree *priv = nft_set_priv(set);
- struct nft_rbtree_elem *rbe = elem->priv;
int err;
do {
@@ -499,7 +499,7 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
- err = __nft_rbtree_insert(net, set, rbe, ext);
+ err = __nft_rbtree_insert(net, set, rbe, elem_priv);
write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
} while (err == -EAGAIN);
@@ -507,13 +507,8 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
return err;
}
-static void nft_rbtree_remove(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
+static void nft_rbtree_erase(struct nft_rbtree *priv, struct nft_rbtree_elem *rbe)
{
- struct nft_rbtree *priv = nft_set_priv(set);
- struct nft_rbtree_elem *rbe = elem->priv;
-
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
rb_erase(&rbe->node, &priv->root);
@@ -521,33 +516,43 @@ static void nft_rbtree_remove(const struct net *net,
write_unlock_bh(&priv->lock);
}
+static void nft_rbtree_remove(const struct net *net,
+ const struct nft_set *set,
+ struct nft_elem_priv *elem_priv)
+{
+ struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv);
+ struct nft_rbtree *priv = nft_set_priv(set);
+
+ nft_rbtree_erase(priv, rbe);
+}
+
static void nft_rbtree_activate(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- struct nft_rbtree_elem *rbe = elem->priv;
+ struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &rbe->ext);
}
-static bool nft_rbtree_flush(const struct net *net,
- const struct nft_set *set, void *priv)
+static void nft_rbtree_flush(const struct net *net,
+ const struct nft_set *set,
+ struct nft_elem_priv *elem_priv)
{
- struct nft_rbtree_elem *rbe = priv;
+ struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &rbe->ext);
-
- return true;
}
-static void *nft_rbtree_deactivate(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
+static struct nft_elem_priv *
+nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
+ struct nft_rbtree_elem *rbe, *this = nft_elem_priv_cast(elem->priv);
const struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
- struct nft_rbtree_elem *rbe, *this = elem->priv;
u8 genmask = nft_genmask_next(net);
+ u64 tstamp = nft_net_tstamp(net);
int d;
while (parent != NULL) {
@@ -568,14 +573,14 @@ static void *nft_rbtree_deactivate(const struct net *net,
nft_rbtree_interval_end(this)) {
parent = parent->rb_right;
continue;
- } else if (nft_set_elem_expired(&rbe->ext)) {
+ } else if (__nft_set_elem_expired(&rbe->ext, tstamp)) {
break;
} else if (!nft_set_elem_active(&rbe->ext, genmask)) {
parent = parent->rb_left;
continue;
}
- nft_rbtree_flush(net, set, rbe);
- return rbe;
+ nft_rbtree_flush(net, set, &rbe->priv);
+ return &rbe->priv;
}
}
return NULL;
@@ -587,7 +592,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
- struct nft_set_elem elem;
struct rb_node *node;
read_lock_bh(&priv->lock);
@@ -599,9 +603,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
if (!nft_set_elem_active(&rbe->ext, iter->genmask))
goto cont;
- elem.priv = rbe;
-
- iter->err = iter->fn(ctx, set, iter, &elem);
+ iter->err = iter->fn(ctx, set, iter, &rbe->priv);
if (iter->err < 0) {
read_unlock_bh(&priv->lock);
return;
@@ -612,45 +614,35 @@ cont:
read_unlock_bh(&priv->lock);
}
-static void nft_rbtree_gc(struct work_struct *work)
+static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
+{
+ nft_setelem_data_deactivate(net, set, &rbe->priv);
+ nft_rbtree_erase(priv, rbe);
+}
+
+static void nft_rbtree_gc(struct nft_set *set)
{
+ struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe, *rbe_end = NULL;
- struct nftables_pernet *nft_net;
- struct nft_rbtree *priv;
+ struct net *net = read_pnet(&set->net);
+ u64 tstamp = nft_net_tstamp(net);
+ struct rb_node *node, *next;
struct nft_trans_gc *gc;
- struct rb_node *node;
- struct nft_set *set;
- unsigned int gc_seq;
- struct net *net;
- priv = container_of(work, struct nft_rbtree, gc_work.work);
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
- nft_net = nft_pernet(net);
- gc_seq = READ_ONCE(nft_net->gc_seq);
- if (nft_set_gc_is_pending(set))
- goto done;
-
- gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
if (!gc)
- goto done;
-
- read_lock_bh(&priv->lock);
- for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+ return;
- /* Ruleset has been updated, try later. */
- if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
- nft_trans_gc_destroy(gc);
- gc = NULL;
- goto try_later;
- }
+ for (node = rb_first(&priv->root); node ; node = next) {
+ next = rb_next(node);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- if (nft_set_elem_is_dead(&rbe->ext))
- goto dead_elem;
-
/* elements are reversed in the rbtree for historical reasons,
* from highest to lowest value, that is why end element is
* always visited before the start element.
@@ -659,40 +651,37 @@ static void nft_rbtree_gc(struct work_struct *work)
rbe_end = rbe;
continue;
}
- if (!nft_set_elem_expired(&rbe->ext))
+ if (!__nft_set_elem_expired(&rbe->ext, tstamp))
continue;
- nft_set_elem_dead(&rbe->ext);
-
- if (!rbe_end)
- continue;
-
- nft_set_elem_dead(&rbe_end->ext);
-
- gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
if (!gc)
goto try_later;
- nft_trans_gc_elem_add(gc, rbe_end);
- rbe_end = NULL;
-dead_elem:
- gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ /* end element needs to be removed first, it has
+ * no timeout extension.
+ */
+ if (rbe_end) {
+ nft_rbtree_gc_remove(net, set, priv, rbe_end);
+ nft_trans_gc_elem_add(gc, rbe_end);
+ rbe_end = NULL;
+ }
+
+ gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
if (!gc)
goto try_later;
+ nft_rbtree_gc_remove(net, set, priv, rbe);
nft_trans_gc_elem_add(gc, rbe);
}
- gc = nft_trans_gc_catchall_async(gc, gc_seq);
-
try_later:
- read_unlock_bh(&priv->lock);
- if (gc)
- nft_trans_gc_queue_async_done(gc);
-done:
- queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
- nft_set_gc_interval(set));
+ if (gc) {
+ gc = nft_trans_gc_catchall_sync(gc);
+ nft_trans_gc_queue_sync_done(gc);
+ priv->last_gc = jiffies;
+ }
}
static u64 nft_rbtree_privsize(const struct nlattr * const nla[],
@@ -707,15 +696,12 @@ static int nft_rbtree_init(const struct nft_set *set,
{
struct nft_rbtree *priv = nft_set_priv(set);
+ BUILD_BUG_ON(offsetof(struct nft_rbtree_elem, priv) != 0);
+
rwlock_init(&priv->lock);
seqcount_rwlock_init(&priv->count, &priv->lock);
priv->root = RB_ROOT;
- INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc);
- if (set->flags & NFT_SET_TIMEOUT)
- queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
- nft_set_gc_interval(set));
-
return 0;
}
@@ -726,12 +712,10 @@ static void nft_rbtree_destroy(const struct nft_ctx *ctx,
struct nft_rbtree_elem *rbe;
struct rb_node *node;
- cancel_delayed_work_sync(&priv->gc_work);
- rcu_barrier();
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- nf_tables_set_elem_destroy(ctx, set, rbe);
+ nf_tables_set_elem_destroy(ctx, set, &rbe->priv);
}
}
@@ -753,6 +737,21 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
+static void nft_rbtree_commit(struct nft_set *set)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+
+ if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
+ nft_rbtree_gc(set);
+}
+
+static void nft_rbtree_gc_init(const struct nft_set *set)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+
+ priv->last_gc = jiffies;
+}
+
const struct nft_set_type nft_set_rbtree_type = {
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
.ops = {
@@ -766,6 +765,8 @@ const struct nft_set_type nft_set_rbtree_type = {
.deactivate = nft_rbtree_deactivate,
.flush = nft_rbtree_flush,
.activate = nft_rbtree_activate,
+ .commit = nft_rbtree_commit,
+ .gc_init = nft_rbtree_gc_init,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
.get = nft_rbtree_get,
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 9f21953c7..f735d79d8 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -713,6 +713,7 @@ static const struct nft_object_ops nft_tunnel_obj_ops = {
static struct nft_object_type nft_tunnel_obj_type __read_mostly = {
.type = NFT_OBJECT_TUNNEL,
+ .family = NFPROTO_NETDEV,
.ops = &nft_tunnel_obj_ops,
.maxattr = NFTA_TUNNEL_KEY_MAX,
.policy = nft_tunnel_key_policy,