diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
commit | ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch) | |
tree | b2d64bc10158fdd5497876388cd68142ca374ed3 /include/net/netfilter | |
parent | Initial commit. (diff) | |
download | linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip |
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'include/net/netfilter')
51 files changed, 5890 insertions, 0 deletions
diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h new file mode 100644 index 0000000000..371696ec11 --- /dev/null +++ b/include/net/netfilter/br_netfilter.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BR_NETFILTER_H_ +#define _BR_NETFILTER_H_ + +#include <linux/netfilter.h> + +#include "../../../net/bridge/br_private.h" + +static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + struct nf_bridge_info *b = skb_ext_add(skb, SKB_EXT_BRIDGE_NF); + + if (b) + memset(b, 0, sizeof(*b)); + + return b; +#else + return NULL; +#endif +} + +void nf_bridge_update_protocol(struct sk_buff *skb); + +int br_nf_hook_thresh(unsigned int hook, struct net *net, struct sock *sk, + struct sk_buff *skb, struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct net *, struct sock *, + struct sk_buff *)); + +unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb); + +static inline void nf_bridge_push_encap_header(struct sk_buff *skb) +{ + unsigned int len = nf_bridge_encap_header_len(skb); + + skb_push(skb, len); + skb->network_header -= len; +} + +int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb); + +static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) +{ +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + struct net_bridge_port *port; + + port = br_port_get_rcu(dev); + return port ? &port->br->fake_rtable : NULL; +#else + return NULL; +#endif +} + +struct net_device *setup_pre_routing(struct sk_buff *skb, + const struct net *net); + +#if IS_ENABLED(CONFIG_IPV6) +int br_validate_ipv6(struct net *net, struct sk_buff *skb); +unsigned int br_nf_pre_routing_ipv6(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state); +#else +static inline int br_validate_ipv6(struct net *net, struct sk_buff *skb) +{ + return -1; +} + +static inline unsigned int +br_nf_pre_routing_ipv6(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return NF_ACCEPT; +} +#endif + +#endif /* _BR_NETFILTER_H_ */ diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h new file mode 100644 index 0000000000..2c8c2b0238 --- /dev/null +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * IPv4 support for nf_conntrack. + * + * 23 Mar 2004: Yasuyuki Kozakai @ USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - move L3 protocol dependent part from include/linux/netfilter_ipv4/ + * ip_conntarck.h + */ + +#ifndef _NF_CONNTRACK_IPV4_H +#define _NF_CONNTRACK_IPV4_H + +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; +#ifdef CONFIG_NF_CT_PROTO_DCCP +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp; +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp; +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite; +#endif +#ifdef CONFIG_NF_CT_PROTO_GRE +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre; +#endif + +#endif /*_NF_CONNTRACK_IPV4_H*/ diff --git a/include/net/netfilter/ipv4/nf_defrag_ipv4.h b/include/net/netfilter/ipv4/nf_defrag_ipv4.h new file mode 100644 index 0000000000..7fda9ce9f6 --- /dev/null +++ b/include/net/netfilter/ipv4/nf_defrag_ipv4.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_DEFRAG_IPV4_H +#define _NF_DEFRAG_IPV4_H + +struct net; +int nf_defrag_ipv4_enable(struct net *net); +void nf_defrag_ipv4_disable(struct net *net); + +#endif /* _NF_DEFRAG_IPV4_H */ diff --git a/include/net/netfilter/ipv4/nf_dup_ipv4.h b/include/net/netfilter/ipv4/nf_dup_ipv4.h new file mode 100644 index 0000000000..a2bc16cdbc --- /dev/null +++ b/include/net/netfilter/ipv4/nf_dup_ipv4.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_DUP_IPV4_H_ +#define _NF_DUP_IPV4_H_ + +#include <linux/skbuff.h> +#include <uapi/linux/in.h> + +void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, + const struct in_addr *gw, int oif); + +#endif /* _NF_DUP_IPV4_H_ */ diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h new file mode 100644 index 0000000000..c653fcb883 --- /dev/null +++ b/include/net/netfilter/ipv4/nf_reject.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _IPV4_NF_REJECT_H +#define _IPV4_NF_REJECT_H + +#include <linux/skbuff.h> +#include <net/ip.h> +#include <net/icmp.h> +#include <net/netfilter/nf_reject.h> + +void nf_send_unreach(struct sk_buff *skb_in, int code, int hook); +void nf_send_reset(struct net *net, struct sock *, struct sk_buff *oldskb, + int hook); +const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *_oth, int hook); +struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __u8 protocol, int ttl); +void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, + const struct tcphdr *oth); + +struct sk_buff *nf_reject_skb_v4_unreach(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code); +struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook); + + +#endif /* _IPV4_NF_REJECT_H */ diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h new file mode 100644 index 0000000000..e95483192d --- /dev/null +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_CONNTRACK_IPV6_H +#define _NF_CONNTRACK_IPV6_H + +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; + +#endif /* _NF_CONNTRACK_IPV6_H*/ diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h new file mode 100644 index 0000000000..ceadf8ba25 --- /dev/null +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_DEFRAG_IPV6_H +#define _NF_DEFRAG_IPV6_H + +#include <linux/skbuff.h> +#include <linux/types.h> + +int nf_defrag_ipv6_enable(struct net *net); +void nf_defrag_ipv6_disable(struct net *net); + +int nf_ct_frag6_init(void); +void nf_ct_frag6_cleanup(void); +int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user); + +struct inet_frags_ctl; + +struct nft_ct_frag6_pernet { + struct ctl_table_header *nf_frag_frags_hdr; + struct fqdir *fqdir; +}; + +#endif /* _NF_DEFRAG_IPV6_H */ diff --git a/include/net/netfilter/ipv6/nf_dup_ipv6.h b/include/net/netfilter/ipv6/nf_dup_ipv6.h new file mode 100644 index 0000000000..f6312bb04a --- /dev/null +++ b/include/net/netfilter/ipv6/nf_dup_ipv6.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_DUP_IPV6_H_ +#define _NF_DUP_IPV6_H_ + +#include <linux/skbuff.h> + +void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, + const struct in6_addr *gw, int oif); + +#endif /* _NF_DUP_IPV6_H_ */ diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h new file mode 100644 index 0000000000..d729344ba6 --- /dev/null +++ b/include/net/netfilter/ipv6/nf_reject.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _IPV6_NF_REJECT_H +#define _IPV6_NF_REJECT_H + +#include <linux/icmpv6.h> +#include <net/netfilter/nf_reject.h> + +void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code, + unsigned int hooknum); +void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, + int hook); +const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *otcph, + unsigned int *otcplen, int hook); +struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __u8 protocol, int hoplimit); +void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + const struct tcphdr *oth, unsigned int otcplen); + +struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook); +struct sk_buff *nf_reject_skb_v6_unreach(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code); + +#endif /* _IPV6_NF_REJECT_H */ diff --git a/include/net/netfilter/nf_bpf_link.h b/include/net/netfilter/nf_bpf_link.h new file mode 100644 index 0000000000..6c984b0ea8 --- /dev/null +++ b/include/net/netfilter/nf_bpf_link.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +struct bpf_nf_ctx { + const struct nf_hook_state *state; + struct sk_buff *skb; +}; + +#if IS_ENABLED(CONFIG_NETFILTER_BPF_LINK) +int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); +#else +static inline int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} +#endif diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h new file mode 100644 index 0000000000..4085765c33 --- /dev/null +++ b/include/net/netfilter/nf_conntrack.h @@ -0,0 +1,380 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Connection state tracking for netfilter. This is separated from, + * but required by, the (future) NAT layer; it can also be used by an iptables + * extension. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - generalize L3 protocol dependent part. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack.h + */ + +#ifndef _NF_CONNTRACK_H +#define _NF_CONNTRACK_H + +#include <linux/bitops.h> +#include <linux/compiler.h> + +#include <linux/netfilter/nf_conntrack_common.h> +#include <linux/netfilter/nf_conntrack_tcp.h> +#include <linux/netfilter/nf_conntrack_dccp.h> +#include <linux/netfilter/nf_conntrack_sctp.h> +#include <linux/netfilter/nf_conntrack_proto_gre.h> + +#include <net/netfilter/nf_conntrack_tuple.h> + +struct nf_ct_udp { + unsigned long stream_ts; +}; + +/* per conntrack: protocol private data */ +union nf_conntrack_proto { + /* insert conntrack proto private data here */ + struct nf_ct_dccp dccp; + struct ip_ct_sctp sctp; + struct ip_ct_tcp tcp; + struct nf_ct_udp udp; + struct nf_ct_gre gre; + unsigned int tmpl_padto; +}; + +union nf_conntrack_expect_proto { + /* insert expect proto private data here */ +}; + +struct nf_conntrack_net_ecache { + struct delayed_work dwork; + spinlock_t dying_lock; + struct hlist_nulls_head dying_list; +}; + +struct nf_conntrack_net { + /* only used when new connection is allocated: */ + atomic_t count; + unsigned int expect_count; + + /* only used from work queues, configuration plane, and so on: */ + unsigned int users4; + unsigned int users6; + unsigned int users_bridge; +#ifdef CONFIG_SYSCTL + struct ctl_table_header *sysctl_header; +#endif +#ifdef CONFIG_NF_CONNTRACK_EVENTS + struct nf_conntrack_net_ecache ecache; +#endif +}; + +#include <linux/types.h> +#include <linux/skbuff.h> + +#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> +#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> + +struct nf_conn { + /* Usage count in here is 1 for hash table, 1 per skb, + * plus 1 for any connection(s) we are `master' for + * + * Hint, SKB address this struct and refcnt via skb->_nfct and + * helpers nf_conntrack_get() and nf_conntrack_put(). + * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt, + * except that the latter uses internal indirection and does not + * result in a conntrack module dependency. + * beware nf_ct_get() is different and don't inc refcnt. + */ + struct nf_conntrack ct_general; + + spinlock_t lock; + /* jiffies32 when this ct is considered dead */ + u32 timeout; + +#ifdef CONFIG_NF_CONNTRACK_ZONES + struct nf_conntrack_zone zone; +#endif + /* XXX should I move this to the tail ? - Y.K */ + /* These are my tuples; original and reply */ + struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; + + /* Have we seen traffic both ways yet? (bitset) */ + unsigned long status; + + possible_net_t ct_net; + +#if IS_ENABLED(CONFIG_NF_NAT) + struct hlist_node nat_bysource; +#endif + /* all members below initialized via memset */ + struct { } __nfct_init_offset; + + /* If we were expected by an expectation, this will be it */ + struct nf_conn *master; + +#if defined(CONFIG_NF_CONNTRACK_MARK) + u_int32_t mark; +#endif + +#ifdef CONFIG_NF_CONNTRACK_SECMARK + u_int32_t secmark; +#endif + + /* Extensions */ + struct nf_ct_ext *ext; + + /* Storage reserved for other modules, must be the last member */ + union nf_conntrack_proto proto; +}; + +static inline struct nf_conn * +nf_ct_to_nf_conn(const struct nf_conntrack *nfct) +{ + return container_of(nfct, struct nf_conn, ct_general); +} + +static inline struct nf_conn * +nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash) +{ + return container_of(hash, struct nf_conn, + tuplehash[hash->tuple.dst.dir]); +} + +static inline u_int16_t nf_ct_l3num(const struct nf_conn *ct) +{ + return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; +} + +static inline u_int8_t nf_ct_protonum(const struct nf_conn *ct) +{ + return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; +} + +#define nf_ct_tuple(ct, dir) (&(ct)->tuplehash[dir].tuple) + +/* get master conntrack via master expectation */ +#define master_ct(conntr) (conntr->master) + +extern struct net init_net; + +static inline struct net *nf_ct_net(const struct nf_conn *ct) +{ + return read_pnet(&ct->ct_net); +} + +/* Alter reply tuple (maybe alter helper). */ +void nf_conntrack_alter_reply(struct nf_conn *ct, + const struct nf_conntrack_tuple *newreply); + +/* Is this tuple taken? (ignoring any belonging to the given + conntrack). */ +int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack); + +/* Return conntrack_info and tuple hash for given skb. */ +static inline struct nf_conn * +nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) +{ + unsigned long nfct = skb_get_nfct(skb); + + *ctinfo = nfct & NFCT_INFOMASK; + return (struct nf_conn *)(nfct & NFCT_PTRMASK); +} + +void nf_ct_destroy(struct nf_conntrack *nfct); + +void nf_conntrack_tcp_set_closing(struct nf_conn *ct); + +/* decrement reference count on a conntrack */ +static inline void nf_ct_put(struct nf_conn *ct) +{ + if (ct && refcount_dec_and_test(&ct->ct_general.use)) + nf_ct_destroy(&ct->ct_general); +} + +/* load module; enable/disable conntrack in this namespace */ +int nf_ct_netns_get(struct net *net, u8 nfproto); +void nf_ct_netns_put(struct net *net, u8 nfproto); + +/* + * Allocate a hashtable of hlist_head (if nulls == 0), + * or hlist_nulls_head (if nulls == 1) + */ +void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls); + +int nf_conntrack_hash_check_insert(struct nf_conn *ct); +bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report); + +bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, + u_int16_t l3num, struct net *net, + struct nf_conntrack_tuple *tuple); + +void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + u32 extra_jiffies, bool do_acct); + +/* Refresh conntrack for this many jiffies and do accounting */ +static inline void nf_ct_refresh_acct(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + u32 extra_jiffies) +{ + __nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, true); +} + +/* Refresh conntrack for this many jiffies */ +static inline void nf_ct_refresh(struct nf_conn *ct, + const struct sk_buff *skb, + u32 extra_jiffies) +{ + __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, false); +} + +/* kill conntrack and do accounting */ +bool nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + const struct sk_buff *skb); + +/* kill conntrack without accounting */ +static inline bool nf_ct_kill(struct nf_conn *ct) +{ + return nf_ct_delete(ct, 0, 0); +} + +struct nf_ct_iter_data { + struct net *net; + void *data; + u32 portid; + int report; +}; + +/* Iterate over all conntracks: if iter returns true, it's deleted. */ +void nf_ct_iterate_cleanup_net(int (*iter)(struct nf_conn *i, void *data), + const struct nf_ct_iter_data *iter_data); + +/* also set unconfirmed conntracks as dying. Only use in module exit path. */ +void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), + void *data); + +struct nf_conntrack_zone; + +void nf_conntrack_free(struct nf_conn *ct); +struct nf_conn *nf_conntrack_alloc(struct net *net, + const struct nf_conntrack_zone *zone, + const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_tuple *repl, + gfp_t gfp); + +static inline int nf_ct_is_template(const struct nf_conn *ct) +{ + return test_bit(IPS_TEMPLATE_BIT, &ct->status); +} + +/* It's confirmed if it is, or has been in the hash table. */ +static inline int nf_ct_is_confirmed(const struct nf_conn *ct) +{ + return test_bit(IPS_CONFIRMED_BIT, &ct->status); +} + +static inline int nf_ct_is_dying(const struct nf_conn *ct) +{ + return test_bit(IPS_DYING_BIT, &ct->status); +} + +/* Packet is received from loopback */ +static inline bool nf_is_loopback_packet(const struct sk_buff *skb) +{ + return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK; +} + +#define nfct_time_stamp ((u32)(jiffies)) + +/* jiffies until ct expires, 0 if already expired */ +static inline unsigned long nf_ct_expires(const struct nf_conn *ct) +{ + s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; + + return max(timeout, 0); +} + +static inline bool nf_ct_is_expired(const struct nf_conn *ct) +{ + return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0; +} + +/* use after obtaining a reference count */ +static inline bool nf_ct_should_gc(const struct nf_conn *ct) +{ + return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) && + !nf_ct_is_dying(ct); +} + +#define NF_CT_DAY (86400 * HZ) + +/* Set an arbitrary timeout large enough not to ever expire, this save + * us a check for the IPS_OFFLOAD_BIT from the packet path via + * nf_ct_is_expired(). + */ +static inline void nf_ct_offload_timeout(struct nf_conn *ct) +{ + if (nf_ct_expires(ct) < NF_CT_DAY / 2) + WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY); +} + +struct kernel_param; + +int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp); +int nf_conntrack_hash_resize(unsigned int hashsize); + +extern struct hlist_nulls_head *nf_conntrack_hash; +extern unsigned int nf_conntrack_htable_size; +extern seqcount_spinlock_t nf_conntrack_generation; +extern unsigned int nf_conntrack_max; + +/* must be called with rcu read lock held */ +static inline void +nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize) +{ + struct hlist_nulls_head *hptr; + unsigned int sequence, hsz; + + do { + sequence = read_seqcount_begin(&nf_conntrack_generation); + hsz = nf_conntrack_htable_size; + hptr = nf_conntrack_hash; + } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + + *hash = hptr; + *hsize = hsz; +} + +struct nf_conn *nf_ct_tmpl_alloc(struct net *net, + const struct nf_conntrack_zone *zone, + gfp_t flags); +void nf_ct_tmpl_free(struct nf_conn *tmpl); + +u32 nf_ct_get_id(const struct nf_conn *ct); +u32 nf_conntrack_count(const struct net *net); + +static inline void +nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info) +{ + skb_set_nfct(skb, (unsigned long)ct | info); +} + +extern unsigned int nf_conntrack_net_id; + +static inline struct nf_conntrack_net *nf_ct_pernet(const struct net *net) +{ + return net_generic(net, nf_conntrack_net_id); +} + +int nf_ct_skb_network_trim(struct sk_buff *skb, int family); +int nf_ct_handle_fragments(struct net *net, struct sk_buff *skb, + u16 zone, u8 family, u8 *proto, u16 *mru); + +#define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) +#define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) +#define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v)) + +#define MODULE_ALIAS_NFCT_HELPER(helper) \ + MODULE_ALIAS("nfct-helper-" helper) + +#endif /* _NF_CONNTRACK_H */ diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h new file mode 100644 index 0000000000..a120685cac --- /dev/null +++ b/include/net/netfilter/nf_conntrack_acct.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * (C) 2008 Krzysztof Piotr Oledzki <ole@ans.pl> + */ + +#ifndef _NF_CONNTRACK_ACCT_H +#define _NF_CONNTRACK_ACCT_H +#include <net/net_namespace.h> +#include <linux/netfilter/nf_conntrack_common.h> +#include <linux/netfilter/nf_conntrack_tuple_common.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_extend.h> + +struct nf_conn_counter { + atomic64_t packets; + atomic64_t bytes; +}; + +struct nf_conn_acct { + struct nf_conn_counter counter[IP_CT_DIR_MAX]; +}; + +static inline +struct nf_conn_acct *nf_conn_acct_find(const struct nf_conn *ct) +{ + return nf_ct_ext_find(ct, NF_CT_EXT_ACCT); +} + +static inline +struct nf_conn_acct *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + struct net *net = nf_ct_net(ct); + struct nf_conn_acct *acct; + + if (!net->ct.sysctl_acct) + return NULL; + + acct = nf_ct_ext_add(ct, NF_CT_EXT_ACCT, gfp); + if (!acct) + pr_debug("failed to add accounting extension area"); + + + return acct; +#else + return NULL; +#endif +} + +/* Check if connection tracking accounting is enabled */ +static inline bool nf_ct_acct_enabled(struct net *net) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + return net->ct.sysctl_acct != 0; +#else + return false; +#endif +} + +/* Enable/disable connection tracking accounting */ +static inline void nf_ct_set_acct(struct net *net, bool enable) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + net->ct.sysctl_acct = enable; +#endif +} + +void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets, + unsigned int bytes); + +static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir, + unsigned int bytes) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + nf_ct_acct_add(ct, dir, 1, bytes); +#endif +} + +void nf_conntrack_acct_pernet_init(struct net *net); + +#endif /* _NF_CONNTRACK_ACCT_H */ diff --git a/include/net/netfilter/nf_conntrack_act_ct.h b/include/net/netfilter/nf_conntrack_act_ct.h new file mode 100644 index 0000000000..e5f2f0b73a --- /dev/null +++ b/include/net/netfilter/nf_conntrack_act_ct.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _NF_CONNTRACK_ACT_CT_H +#define _NF_CONNTRACK_ACT_CT_H + +#include <net/netfilter/nf_conntrack.h> +#include <linux/netfilter/nf_conntrack_common.h> +#include <net/netfilter/nf_conntrack_extend.h> + +struct nf_conn_act_ct_ext { + int ifindex[IP_CT_DIR_MAX]; +}; + +static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_find(const struct nf_conn *ct) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + return nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT); +#else + return NULL; +#endif +} + +static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + struct nf_conn_act_ct_ext *act_ct_ext; + + act_ct_ext = nf_conn_act_ct_ext_find(ct); + if (dev_net(skb->dev) == &init_net && act_ct_ext) + act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex; +#endif +} + +static inline struct +nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + struct nf_conn_act_ct_ext *act_ct = nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT); + + if (act_ct) + return act_ct; + + act_ct = nf_ct_ext_add(ct, NF_CT_EXT_ACT_CT, GFP_ATOMIC); + nf_conn_act_ct_ext_fill(skb, ct, ctinfo); + return act_ct; +#else + return NULL; +#endif +} + +#endif /* _NF_CONNTRACK_ACT_CT_H */ diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h new file mode 100644 index 0000000000..2d0da478c8 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_bpf.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _NF_CONNTRACK_BPF_H +#define _NF_CONNTRACK_BPF_H + +#include <linux/kconfig.h> +#include <net/netfilter/nf_conntrack.h> + +struct nf_conn___init { + struct nf_conn ct; +}; + +#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) + +extern int register_nf_conntrack_bpf(void); +extern void cleanup_nf_conntrack_bpf(void); + +#else + +static inline int register_nf_conntrack_bpf(void) +{ + return 0; +} + +static inline void cleanup_nf_conntrack_bpf(void) +{ +} + +#endif + +#if (IS_BUILTIN(CONFIG_NF_NAT) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_NAT) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) + +extern int register_nf_nat_bpf(void); + +#else + +static inline int register_nf_nat_bpf(void) +{ + return 0; +} + +#endif + +#endif /* _NF_CONNTRACK_BPF_H */ diff --git a/include/net/netfilter/nf_conntrack_bridge.h b/include/net/netfilter/nf_conntrack_bridge.h new file mode 100644 index 0000000000..c564281ede --- /dev/null +++ b/include/net/netfilter/nf_conntrack_bridge.h @@ -0,0 +1,19 @@ +#ifndef NF_CONNTRACK_BRIDGE_ +#define NF_CONNTRACK_BRIDGE_ + +#include <linux/module.h> +#include <linux/types.h> +#include <uapi/linux/if_ether.h> + +struct nf_hook_ops; + +struct nf_ct_bridge_info { + struct nf_hook_ops *ops; + unsigned int ops_size; + struct module *me; +}; + +void nf_ct_bridge_register(struct nf_ct_bridge_info *info); +void nf_ct_bridge_unregister(struct nf_ct_bridge_info *info); + +#endif diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h new file mode 100644 index 0000000000..3384859a89 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_core.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This header is used to share core functionality between the + * standalone connection tracking module, and the compatibility layer's use + * of connection tracking. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - generalize L3 protocol dependent part. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_core.h + */ + +#ifndef _NF_CONNTRACK_CORE_H +#define _NF_CONNTRACK_CORE_H + +#include <linux/netfilter.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_ecache.h> +#include <net/netfilter/nf_conntrack_l4proto.h> + +/* This header is used to share core functionality between the + standalone connection tracking module, and the compatibility layer's use + of connection tracking. */ + +unsigned int nf_conntrack_in(struct sk_buff *skb, + const struct nf_hook_state *state); + +int nf_conntrack_init_net(struct net *net); +void nf_conntrack_cleanup_net(struct net *net); +void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list); + +void nf_conntrack_proto_pernet_init(struct net *net); + +int nf_conntrack_proto_init(void); +void nf_conntrack_proto_fini(void); + +int nf_conntrack_init_start(void); +void nf_conntrack_cleanup_start(void); + +void nf_conntrack_init_end(void); +void nf_conntrack_cleanup_end(void); + +bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, + const struct nf_conntrack_tuple *orig); + +/* Find a connection corresponding to a tuple. */ +struct nf_conntrack_tuple_hash * +nf_conntrack_find_get(struct net *net, + const struct nf_conntrack_zone *zone, + const struct nf_conntrack_tuple *tuple); + +int __nf_conntrack_confirm(struct sk_buff *skb); + +/* Confirm a connection: returns NF_DROP if packet must be dropped. */ +static inline int nf_conntrack_confirm(struct sk_buff *skb) +{ + struct nf_conn *ct = (struct nf_conn *)skb_nfct(skb); + int ret = NF_ACCEPT; + + if (ct) { + if (!nf_ct_is_confirmed(ct)) { + ret = __nf_conntrack_confirm(skb); + + if (ret == NF_ACCEPT) + ct = (struct nf_conn *)skb_nfct(skb); + } + + if (ret == NF_ACCEPT && nf_ct_ecache_exist(ct)) + nf_ct_deliver_cached_events(ct); + } + return ret; +} + +unsigned int nf_confirm(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); + +void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_l4proto *proto); + +#define CONNTRACK_LOCKS 1024 + +extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; +void nf_conntrack_lock(spinlock_t *lock); + +extern spinlock_t nf_conntrack_expect_lock; + +/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */ + +static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout) +{ + if (timeout > INT_MAX) + timeout = INT_MAX; + + if (nf_ct_is_confirmed(ct)) + WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout); + else + ct->timeout = (u32)timeout; +} + +int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout); +void __nf_ct_change_status(struct nf_conn *ct, unsigned long on, unsigned long off); +int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status); + +#endif /* _NF_CONNTRACK_CORE_H */ diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h new file mode 100644 index 0000000000..e227d997fc --- /dev/null +++ b/include/net/netfilter/nf_conntrack_count.h @@ -0,0 +1,40 @@ +#ifndef _NF_CONNTRACK_COUNT_H +#define _NF_CONNTRACK_COUNT_H + +#include <linux/list.h> +#include <linux/spinlock.h> +#include <net/netfilter/nf_conntrack_tuple.h> +#include <net/netfilter/nf_conntrack_zones.h> + +struct nf_conncount_data; + +struct nf_conncount_list { + spinlock_t list_lock; + u32 last_gc; /* jiffies at most recent gc */ + struct list_head head; /* connections with the same filtering key */ + unsigned int count; /* length of list */ +}; + +struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, + unsigned int keylen); +void nf_conncount_destroy(struct net *net, unsigned int family, + struct nf_conncount_data *data); + +unsigned int nf_conncount_count(struct net *net, + struct nf_conncount_data *data, + const u32 *key, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone); + +int nf_conncount_add(struct net *net, struct nf_conncount_list *list, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone); + +void nf_conncount_list_init(struct nf_conncount_list *list); + +bool nf_conncount_gc_list(struct net *net, + struct nf_conncount_list *list); + +void nf_conncount_cache_free(struct nf_conncount_list *list); + +#endif diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h new file mode 100644 index 0000000000..0c1dac318e --- /dev/null +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * connection tracking event cache. + */ + +#ifndef _NF_CONNTRACK_ECACHE_H +#define _NF_CONNTRACK_ECACHE_H +#include <net/netfilter/nf_conntrack.h> + +#include <net/net_namespace.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <linux/netfilter/nf_conntrack_common.h> +#include <linux/netfilter/nf_conntrack_tuple_common.h> +#include <net/netfilter/nf_conntrack_extend.h> + +enum nf_ct_ecache_state { + NFCT_ECACHE_DESTROY_FAIL, /* tried but failed to send destroy event */ + NFCT_ECACHE_DESTROY_SENT, /* sent destroy event after failure */ +}; + +struct nf_conntrack_ecache { + unsigned long cache; /* bitops want long */ + u16 ctmask; /* bitmask of ct events to be delivered */ + u16 expmask; /* bitmask of expect events to be delivered */ + u32 missed; /* missed events */ + u32 portid; /* netlink portid of destroyer */ +}; + +static inline struct nf_conntrack_ecache * +nf_ct_ecache_find(const struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS + return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE); +#else + return NULL; +#endif +} + +static inline bool nf_ct_ecache_exist(const struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS + return nf_ct_ext_exist(ct, NF_CT_EXT_ECACHE); +#else + return false; +#endif +} + +#ifdef CONFIG_NF_CONNTRACK_EVENTS + +/* This structure is passed to event handler */ +struct nf_ct_event { + struct nf_conn *ct; + u32 portid; + int report; +}; + +struct nf_exp_event { + struct nf_conntrack_expect *exp; + u32 portid; + int report; +}; + +struct nf_ct_event_notifier { + int (*ct_event)(unsigned int events, const struct nf_ct_event *item); + int (*exp_event)(unsigned int events, const struct nf_exp_event *item); +}; + +void nf_conntrack_register_notifier(struct net *net, + const struct nf_ct_event_notifier *nb); +void nf_conntrack_unregister_notifier(struct net *net); + +void nf_ct_deliver_cached_events(struct nf_conn *ct); +int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, + u32 portid, int report); + +bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp); +#else + +static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) +{ +} + +static inline int nf_conntrack_eventmask_report(unsigned int eventmask, + struct nf_conn *ct, + u32 portid, + int report) +{ + return 0; +} + +static inline bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) +{ + return false; +} +#endif + +static inline void +nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS + struct net *net = nf_ct_net(ct); + struct nf_conntrack_ecache *e; + + if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) + return; + + e = nf_ct_ecache_find(ct); + if (e == NULL) + return; + + set_bit(event, &e->cache); +#endif +} + +static inline int +nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct, + u32 portid, int report) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS + if (nf_ct_ecache_exist(ct)) + return nf_conntrack_eventmask_report(1 << event, ct, portid, report); +#endif + return 0; +} + +static inline int +nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS + if (nf_ct_ecache_exist(ct)) + return nf_conntrack_eventmask_report(1 << event, ct, 0, 0); +#endif + return 0; +} + +#ifdef CONFIG_NF_CONNTRACK_EVENTS +void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, + struct nf_conntrack_expect *exp, + u32 portid, int report); + +void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state); + +void nf_conntrack_ecache_pernet_init(struct net *net); +void nf_conntrack_ecache_pernet_fini(struct net *net); + +struct nf_conntrack_net_ecache *nf_conn_pernet_ecache(const struct net *net); + +static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) +{ + return net->ct.ecache_dwork_pending; +} +#else /* CONFIG_NF_CONNTRACK_EVENTS */ + +static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e, + struct nf_conntrack_expect *exp, + u32 portid, + int report) +{ +} + +static inline void nf_conntrack_ecache_work(struct net *net, + enum nf_ct_ecache_state s) +{ +} + +static inline void nf_conntrack_ecache_pernet_init(struct net *net) +{ +} + +static inline void nf_conntrack_ecache_pernet_fini(struct net *net) +{ +} +static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) { return false; } +#endif /* CONFIG_NF_CONNTRACK_EVENTS */ +#endif /*_NF_CONNTRACK_ECACHE_H*/ diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h new file mode 100644 index 0000000000..165e7a03b8 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * connection tracking expectations. + */ + +#ifndef _NF_CONNTRACK_EXPECT_H +#define _NF_CONNTRACK_EXPECT_H + +#include <linux/refcount.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_zones.h> + +extern unsigned int nf_ct_expect_hsize; +extern unsigned int nf_ct_expect_max; +extern struct hlist_head *nf_ct_expect_hash; + +struct nf_conntrack_expect { + /* Conntrack expectation list member */ + struct hlist_node lnode; + + /* Hash member */ + struct hlist_node hnode; + + /* We expect this tuple, with the following mask */ + struct nf_conntrack_tuple tuple; + struct nf_conntrack_tuple_mask mask; + + /* Usage count. */ + refcount_t use; + + /* Flags */ + unsigned int flags; + + /* Expectation class */ + unsigned int class; + + /* Function to call after setup and insertion */ + void (*expectfn)(struct nf_conn *new, + struct nf_conntrack_expect *this); + + /* Helper to assign to new connection */ + struct nf_conntrack_helper *helper; + + /* The conntrack of the master connection */ + struct nf_conn *master; + + /* Timer function; deletes the expectation. */ + struct timer_list timeout; + +#if IS_ENABLED(CONFIG_NF_NAT) + union nf_inet_addr saved_addr; + /* This is the original per-proto part, used to map the + * expected connection the way the recipient expects. */ + union nf_conntrack_man_proto saved_proto; + /* Direction relative to the master connection. */ + enum ip_conntrack_dir dir; +#endif + + struct rcu_head rcu; +}; + +static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) +{ + return nf_ct_net(exp->master); +} + +#define NF_CT_EXP_POLICY_NAME_LEN 16 + +struct nf_conntrack_expect_policy { + unsigned int max_expected; + unsigned int timeout; + char name[NF_CT_EXP_POLICY_NAME_LEN]; +}; + +#define NF_CT_EXPECT_CLASS_DEFAULT 0 +#define NF_CT_EXPECT_MAX_CNT 255 + +/* Allow to reuse expectations with the same tuples from different master + * conntracks. + */ +#define NF_CT_EXP_F_SKIP_MASTER 0x1 + +int nf_conntrack_expect_pernet_init(struct net *net); +void nf_conntrack_expect_pernet_fini(struct net *net); + +int nf_conntrack_expect_init(void); +void nf_conntrack_expect_fini(void); + +struct nf_conntrack_expect * +__nf_ct_expect_find(struct net *net, + const struct nf_conntrack_zone *zone, + const struct nf_conntrack_tuple *tuple); + +struct nf_conntrack_expect * +nf_ct_expect_find_get(struct net *net, + const struct nf_conntrack_zone *zone, + const struct nf_conntrack_tuple *tuple); + +struct nf_conntrack_expect * +nf_ct_find_expectation(struct net *net, + const struct nf_conntrack_zone *zone, + const struct nf_conntrack_tuple *tuple, bool unlink); + +void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, + u32 portid, int report); +static inline void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) +{ + nf_ct_unlink_expect_report(exp, 0, 0); +} + +void nf_ct_remove_expectations(struct nf_conn *ct); +void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); +bool nf_ct_remove_expect(struct nf_conntrack_expect *exp); + +void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data), void *data); +void nf_ct_expect_iterate_net(struct net *net, + bool (*iter)(struct nf_conntrack_expect *e, void *data), + void *data, u32 portid, int report); + +/* Allocate space for an expectation: this is mandatory before calling + nf_ct_expect_related. You will have to call put afterwards. */ +struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me); +void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t, + const union nf_inet_addr *, + const union nf_inet_addr *, + u_int8_t, const __be16 *, const __be16 *); +void nf_ct_expect_put(struct nf_conntrack_expect *exp); +int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, + u32 portid, int report, unsigned int flags); +static inline int nf_ct_expect_related(struct nf_conntrack_expect *expect, + unsigned int flags) +{ + return nf_ct_expect_related_report(expect, 0, 0, flags); +} + +#endif /*_NF_CONNTRACK_EXPECT_H*/ + diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h new file mode 100644 index 0000000000..0b247248b0 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_CONNTRACK_EXTEND_H +#define _NF_CONNTRACK_EXTEND_H + +#include <linux/slab.h> + +#include <net/netfilter/nf_conntrack.h> + +enum nf_ct_ext_id { + NF_CT_EXT_HELPER, +#if IS_ENABLED(CONFIG_NF_NAT) + NF_CT_EXT_NAT, +#endif + NF_CT_EXT_SEQADJ, + NF_CT_EXT_ACCT, +#ifdef CONFIG_NF_CONNTRACK_EVENTS + NF_CT_EXT_ECACHE, +#endif +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + NF_CT_EXT_TSTAMP, +#endif +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + NF_CT_EXT_TIMEOUT, +#endif +#ifdef CONFIG_NF_CONNTRACK_LABELS + NF_CT_EXT_LABELS, +#endif +#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) + NF_CT_EXT_SYNPROXY, +#endif +#if IS_ENABLED(CONFIG_NET_ACT_CT) + NF_CT_EXT_ACT_CT, +#endif + NF_CT_EXT_NUM, +}; + +/* Extensions: optional stuff which isn't permanently in struct. */ +struct nf_ct_ext { + u8 offset[NF_CT_EXT_NUM]; + u8 len; + unsigned int gen_id; + char data[] __aligned(8); +}; + +static inline bool __nf_ct_ext_exist(const struct nf_ct_ext *ext, u8 id) +{ + return !!ext->offset[id]; +} + +static inline bool nf_ct_ext_exist(const struct nf_conn *ct, u8 id) +{ + return (ct->ext && __nf_ct_ext_exist(ct->ext, id)); +} + +void *__nf_ct_ext_find(const struct nf_ct_ext *ext, u8 id); + +static inline void *nf_ct_ext_find(const struct nf_conn *ct, u8 id) +{ + struct nf_ct_ext *ext = ct->ext; + + if (!ext || !__nf_ct_ext_exist(ext, id)) + return NULL; + + if (unlikely(ext->gen_id)) + return __nf_ct_ext_find(ext, id); + + return (void *)ct->ext + ct->ext->offset[id]; +} + +/* Add this type, returns pointer to data or NULL. */ +void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); + +/* ext genid. if ext->id != ext_genid, extensions cannot be used + * anymore unless conntrack has CONFIRMED bit set. + */ +extern atomic_t nf_conntrack_ext_genid; +void nf_ct_ext_bump_genid(void); + +#endif /* _NF_CONNTRACK_EXTEND_H */ diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h new file mode 100644 index 0000000000..de2f956abf --- /dev/null +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -0,0 +1,183 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * connection tracking helpers. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - generalize L3 protocol dependent part. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_helper.h + */ + +#ifndef _NF_CONNTRACK_HELPER_H +#define _NF_CONNTRACK_HELPER_H +#include <linux/refcount.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_extend.h> +#include <net/netfilter/nf_conntrack_expect.h> + +#define NF_NAT_HELPER_PREFIX "ip_nat_" +#define NF_NAT_HELPER_NAME(name) NF_NAT_HELPER_PREFIX name +#define MODULE_ALIAS_NF_NAT_HELPER(name) \ + MODULE_ALIAS(NF_NAT_HELPER_NAME(name)) + +struct module; + +enum nf_ct_helper_flags { + NF_CT_HELPER_F_USERSPACE = (1 << 0), + NF_CT_HELPER_F_CONFIGURED = (1 << 1), +}; + +#define NF_CT_HELPER_NAME_LEN 16 + +struct nf_conntrack_helper { + struct hlist_node hnode; /* Internal use. */ + + char name[NF_CT_HELPER_NAME_LEN]; /* name of the module */ + refcount_t refcnt; + struct module *me; /* pointer to self */ + const struct nf_conntrack_expect_policy *expect_policy; + + /* Tuple of things we will help (compared against server response) */ + struct nf_conntrack_tuple tuple; + + /* Function to call when data passes; return verdict, or -1 to + invalidate. */ + int (*help)(struct sk_buff *skb, + unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info conntrackinfo); + + void (*destroy)(struct nf_conn *ct); + + int (*from_nlattr)(struct nlattr *attr, struct nf_conn *ct); + int (*to_nlattr)(struct sk_buff *skb, const struct nf_conn *ct); + unsigned int expect_class_max; + + unsigned int flags; + + /* For user-space helpers: */ + unsigned int queue_num; + /* length of userspace private data stored in nf_conn_help->data */ + u16 data_len; + /* name of NAT helper module */ + char nat_mod_name[NF_CT_HELPER_NAME_LEN]; +}; + +/* Must be kept in sync with the classes defined by helpers */ +#define NF_CT_MAX_EXPECT_CLASSES 4 + +/* nf_conn feature for connections that have a helper */ +struct nf_conn_help { + /* Helper. if any */ + struct nf_conntrack_helper __rcu *helper; + + struct hlist_head expectations; + + /* Current number of expected connections */ + u8 expecting[NF_CT_MAX_EXPECT_CLASSES]; + + /* private helper information. */ + char data[32] __aligned(8); +}; + +#define NF_CT_HELPER_BUILD_BUG_ON(structsize) \ + BUILD_BUG_ON((structsize) > sizeof_field(struct nf_conn_help, data)) + +struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name, + u16 l3num, u8 protonum); + +struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name, + u16 l3num, + u8 protonum); +void nf_conntrack_helper_put(struct nf_conntrack_helper *helper); + +void nf_ct_helper_init(struct nf_conntrack_helper *helper, + u16 l3num, u16 protonum, const char *name, + u16 default_port, u16 spec_port, u32 id, + const struct nf_conntrack_expect_policy *exp_pol, + u32 expect_class_max, + int (*help)(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo), + int (*from_nlattr)(struct nlattr *attr, + struct nf_conn *ct), + struct module *module); + +int nf_conntrack_helper_register(struct nf_conntrack_helper *); +void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); + +int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int); +void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *, + unsigned int); + +struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); + +int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, + gfp_t flags); + +int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, u16 proto); +int nf_ct_add_helper(struct nf_conn *ct, const char *name, u8 family, + u8 proto, bool nat, struct nf_conntrack_helper **hp); + +void nf_ct_helper_destroy(struct nf_conn *ct); + +static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) +{ + return nf_ct_ext_find(ct, NF_CT_EXT_HELPER); +} + +static inline void *nfct_help_data(const struct nf_conn *ct) +{ + struct nf_conn_help *help; + + help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER); + + return (void *)help->data; +} + +int nf_conntrack_helper_init(void); +void nf_conntrack_helper_fini(void); + +int nf_conntrack_broadcast_help(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int timeout); + +struct nf_ct_helper_expectfn { + struct list_head head; + const char *name; + void (*expectfn)(struct nf_conn *ct, struct nf_conntrack_expect *exp); +}; + +__printf(3,4) +void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, + const char *fmt, ...); + +void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n); +void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n); +struct nf_ct_helper_expectfn * +nf_ct_helper_expectfn_find_by_name(const char *name); +struct nf_ct_helper_expectfn * +nf_ct_helper_expectfn_find_by_symbol(const void *symbol); + +extern struct hlist_head *nf_ct_helper_hash; +extern unsigned int nf_ct_helper_hsize; + +struct nf_conntrack_nat_helper { + struct list_head list; + char mod_name[NF_CT_HELPER_NAME_LEN]; /* module name */ + struct module *module; /* pointer to self */ +}; + +#define NF_CT_NAT_HELPER_INIT(name) \ + { \ + .mod_name = NF_NAT_HELPER_NAME(name), \ + .module = THIS_MODULE \ + } + +void nf_nat_helper_register(struct nf_conntrack_nat_helper *nat); +void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat); +int nf_nat_helper_try_module_get(const char *name, u16 l3num, + u8 protonum); +void nf_nat_helper_put(struct nf_conntrack_helper *helper); +#endif /*_NF_CONNTRACK_HELPER_H*/ diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h new file mode 100644 index 0000000000..1f47bef517 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Header for use in defining a given L4 protocol for connection tracking. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - generalized L3 protocol dependent part. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_protcol.h + */ + +#ifndef _NF_CONNTRACK_L4PROTO_H +#define _NF_CONNTRACK_L4PROTO_H +#include <linux/netlink.h> +#include <net/netlink.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netns/generic.h> + +struct seq_file; + +struct nf_conntrack_l4proto { + /* L4 Protocol number. */ + u_int8_t l4proto; + + /* Resolve clashes on insertion races. */ + bool allow_clash; + + /* protoinfo nlattr size, closes a hole */ + u16 nlattr_size; + + /* called by gc worker if table is full */ + bool (*can_early_drop)(const struct nf_conn *ct); + + /* convert protoinfo to nfnetink attributes */ + int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla, + struct nf_conn *ct, bool destroy); + + /* convert nfnetlink attributes to protoinfo */ + int (*from_nlattr)(struct nlattr *tb[], struct nf_conn *ct); + + int (*tuple_to_nlattr)(struct sk_buff *skb, + const struct nf_conntrack_tuple *t); + /* Calculate tuple nlattr size */ + unsigned int (*nlattr_tuple_size)(void); + int (*nlattr_to_tuple)(struct nlattr *tb[], + struct nf_conntrack_tuple *t, + u_int32_t flags); + const struct nla_policy *nla_policy; + + struct { + int (*nlattr_to_obj)(struct nlattr *tb[], + struct net *net, void *data); + int (*obj_to_nlattr)(struct sk_buff *skb, const void *data); + + u16 obj_size; + u16 nlattr_max; + const struct nla_policy *nla_policy; + } ctnl_timeout; +#ifdef CONFIG_NF_CONNTRACK_PROCFS + /* Print out the private part of the conntrack. */ + void (*print_conntrack)(struct seq_file *s, struct nf_conn *); +#endif +}; + +bool icmp_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct net *net, + struct nf_conntrack_tuple *tuple); + +bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct net *net, + struct nf_conntrack_tuple *tuple); + +bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig); +bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig); + +int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb, + unsigned int dataoff, + const struct nf_hook_state *state, + u8 l4proto, + union nf_inet_addr *outer_daddr); + +int nf_conntrack_icmpv4_error(struct nf_conn *tmpl, + struct sk_buff *skb, + unsigned int dataoff, + const struct nf_hook_state *state); + +int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, + struct sk_buff *skb, + unsigned int dataoff, + const struct nf_hook_state *state); + +int nf_conntrack_icmp_packet(struct nf_conn *ct, + struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); + +int nf_conntrack_icmpv6_packet(struct nf_conn *ct, + struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); + +int nf_conntrack_udp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_udplite_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_tcp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_dccp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_sctp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_gre_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); + +void nf_conntrack_generic_init_net(struct net *net); +void nf_conntrack_tcp_init_net(struct net *net); +void nf_conntrack_udp_init_net(struct net *net); +void nf_conntrack_gre_init_net(struct net *net); +void nf_conntrack_dccp_init_net(struct net *net); +void nf_conntrack_sctp_init_net(struct net *net); +void nf_conntrack_icmp_init_net(struct net *net); +void nf_conntrack_icmpv6_init_net(struct net *net); + +/* Existing built-in generic protocol */ +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; + +#define MAX_NF_CT_PROTO IPPROTO_UDPLITE + +const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto); + +/* Generic netlink helpers */ +int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple); +int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], + struct nf_conntrack_tuple *t, + u_int32_t flags); +unsigned int nf_ct_port_nlattr_tuple_size(void); +extern const struct nla_policy nf_ct_port_nla_policy[]; + +#ifdef CONFIG_SYSCTL +__printf(4, 5) __cold +void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, + const struct nf_conn *ct, + const struct nf_hook_state *state, + const char *fmt, ...); +__printf(4, 5) __cold +void nf_l4proto_log_invalid(const struct sk_buff *skb, + const struct nf_hook_state *state, + u8 protonum, + const char *fmt, ...); +#else +static inline __printf(4, 5) __cold +void nf_l4proto_log_invalid(const struct sk_buff *skb, + const struct nf_hook_state *state, + u8 protonum, + const char *fmt, ...) {} +static inline __printf(4, 5) __cold +void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, + const struct nf_conn *ct, + const struct nf_hook_state *state, + const char *fmt, ...) { } +#endif /* CONFIG_SYSCTL */ + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +static inline struct nf_generic_net *nf_generic_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.generic; +} + +static inline struct nf_tcp_net *nf_tcp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.tcp; +} + +static inline struct nf_udp_net *nf_udp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.udp; +} + +static inline struct nf_icmp_net *nf_icmp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.icmp; +} + +static inline struct nf_icmp_net *nf_icmpv6_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.icmpv6; +} + +/* Caller must check nf_ct_protonum(ct) is IPPROTO_TCP before calling. */ +static inline void nf_ct_set_tcp_be_liberal(struct nf_conn *ct) +{ + ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; + ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; +} + +/* Caller must check nf_ct_protonum(ct) is IPPROTO_TCP before calling. */ +static inline bool nf_conntrack_tcp_established(const struct nf_conn *ct) +{ + return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED && + test_bit(IPS_ASSURED_BIT, &ct->status); +} +#endif + +#ifdef CONFIG_NF_CT_PROTO_DCCP +static inline struct nf_dccp_net *nf_dccp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.dccp; +} +#endif + +#ifdef CONFIG_NF_CT_PROTO_SCTP +static inline struct nf_sctp_net *nf_sctp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.sctp; +} +#endif + +#ifdef CONFIG_NF_CT_PROTO_GRE +static inline struct nf_gre_net *nf_gre_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.gre; +} +#endif + +#endif /*_NF_CONNTRACK_PROTOCOL_H*/ diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h new file mode 100644 index 0000000000..fcb19a4e8f --- /dev/null +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _NF_CONNTRACK_LABELS_H +#define _NF_CONNTRACK_LABELS_H + +#include <linux/netfilter/nf_conntrack_common.h> +#include <linux/netfilter/nf_conntrack_tuple_common.h> +#include <linux/types.h> +#include <net/net_namespace.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_extend.h> +#include <uapi/linux/netfilter/xt_connlabel.h> + +#define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE) + +struct nf_conn_labels { + unsigned long bits[NF_CT_LABELS_MAX_SIZE / sizeof(long)]; +}; + +/* Can't use nf_ct_ext_find(), flow dissector cannot use symbols + * exported by nf_conntrack module. + */ +static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + struct nf_ct_ext *ext = ct->ext; + + if (!ext || !__nf_ct_ext_exist(ext, NF_CT_EXT_LABELS)) + return NULL; + + return (void *)ct->ext + ct->ext->offset[NF_CT_EXT_LABELS]; +#else + return NULL; +#endif +} + +static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + struct net *net = nf_ct_net(ct); + + if (net->ct.labels_used == 0) + return NULL; + + return nf_ct_ext_add(ct, NF_CT_EXT_LABELS, GFP_ATOMIC); +#else + return NULL; +#endif +} + +int nf_connlabels_replace(struct nf_conn *ct, + const u32 *data, const u32 *mask, unsigned int words); + +#ifdef CONFIG_NF_CONNTRACK_LABELS +int nf_connlabels_get(struct net *net, unsigned int bit); +void nf_connlabels_put(struct net *net); +#else +static inline int nf_connlabels_get(struct net *net, unsigned int bit) { return 0; } +static inline void nf_connlabels_put(struct net *net) {} +#endif + +#endif /* _NF_CONNTRACK_LABELS_H */ diff --git a/include/net/netfilter/nf_conntrack_seqadj.h b/include/net/netfilter/nf_conntrack_seqadj.h new file mode 100644 index 0000000000..883c414b76 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_seqadj.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_CONNTRACK_SEQADJ_H +#define _NF_CONNTRACK_SEQADJ_H + +#include <net/netfilter/nf_conntrack_extend.h> + +/** + * struct nf_ct_seqadj - sequence number adjustment information + * + * @correction_pos: position of the last TCP sequence number modification + * @offset_before: sequence number offset before last modification + * @offset_after: sequence number offset after last modification + */ +struct nf_ct_seqadj { + u32 correction_pos; + s32 offset_before; + s32 offset_after; +}; + +struct nf_conn_seqadj { + struct nf_ct_seqadj seq[IP_CT_DIR_MAX]; +}; + +static inline struct nf_conn_seqadj *nfct_seqadj(const struct nf_conn *ct) +{ + return nf_ct_ext_find(ct, NF_CT_EXT_SEQADJ); +} + +static inline struct nf_conn_seqadj *nfct_seqadj_ext_add(struct nf_conn *ct) +{ + return nf_ct_ext_add(ct, NF_CT_EXT_SEQADJ, GFP_ATOMIC); +} + +int nf_ct_seqadj_init(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + s32 off); +int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + __be32 seq, s32 off); +void nf_ct_tcp_seqadj_set(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, s32 off); + +int nf_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, unsigned int protoff); +s32 nf_ct_seq_offset(const struct nf_conn *ct, enum ip_conntrack_dir, u32 seq); + +#endif /* _NF_CONNTRACK_SEQADJ_H */ diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h new file mode 100644 index 0000000000..6a3ab081e4 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_CONNTRACK_SYNPROXY_H +#define _NF_CONNTRACK_SYNPROXY_H + +#include <net/netfilter/nf_conntrack_seqadj.h> +#include <net/netns/generic.h> + +struct nf_conn_synproxy { + u32 isn; + u32 its; + u32 tsoff; +}; + +static inline struct nf_conn_synproxy *nfct_synproxy(const struct nf_conn *ct) +{ +#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) + return nf_ct_ext_find(ct, NF_CT_EXT_SYNPROXY); +#else + return NULL; +#endif +} + +static inline struct nf_conn_synproxy *nfct_synproxy_ext_add(struct nf_conn *ct) +{ +#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) + return nf_ct_ext_add(ct, NF_CT_EXT_SYNPROXY, GFP_ATOMIC); +#else + return NULL; +#endif +} + +static inline bool nf_ct_add_synproxy(struct nf_conn *ct, + const struct nf_conn *tmpl) +{ +#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) + if (tmpl && nfct_synproxy(tmpl)) { + if (!nfct_seqadj_ext_add(ct)) + return false; + + if (!nfct_synproxy_ext_add(ct)) + return false; + } +#endif + + return true; +} + +#endif /* _NF_CONNTRACK_SYNPROXY_H */ diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h new file mode 100644 index 0000000000..9fdaba911d --- /dev/null +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_CONNTRACK_TIMEOUT_H +#define _NF_CONNTRACK_TIMEOUT_H + +#include <net/net_namespace.h> +#include <linux/netfilter/nf_conntrack_common.h> +#include <linux/netfilter/nf_conntrack_tuple_common.h> +#include <linux/refcount.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_extend.h> + +#define CTNL_TIMEOUT_NAME_MAX 32 + +struct nf_ct_timeout { + __u16 l3num; + const struct nf_conntrack_l4proto *l4proto; + char data[]; +}; + +struct nf_conn_timeout { + struct nf_ct_timeout __rcu *timeout; +}; + +static inline unsigned int * +nf_ct_timeout_data(const struct nf_conn_timeout *t) +{ +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + struct nf_ct_timeout *timeout; + + timeout = rcu_dereference(t->timeout); + if (timeout == NULL) + return NULL; + + return (unsigned int *)timeout->data; +#else + return NULL; +#endif +} + +static inline +struct nf_conn_timeout *nf_ct_timeout_find(const struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + return nf_ct_ext_find(ct, NF_CT_EXT_TIMEOUT); +#else + return NULL; +#endif +} + +static inline +struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct, + struct nf_ct_timeout *timeout, + gfp_t gfp) +{ +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + struct nf_conn_timeout *timeout_ext; + + timeout_ext = nf_ct_ext_add(ct, NF_CT_EXT_TIMEOUT, gfp); + if (timeout_ext == NULL) + return NULL; + + rcu_assign_pointer(timeout_ext->timeout, timeout); + + return timeout_ext; +#else + return NULL; +#endif +}; + +static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct) +{ + unsigned int *timeouts = NULL; +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + struct nf_conn_timeout *timeout_ext; + + timeout_ext = nf_ct_timeout_find(ct); + if (timeout_ext) + timeouts = nf_ct_timeout_data(timeout_ext); +#endif + return timeouts; +} + +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT +void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout); +int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num, + const char *timeout_name); +void nf_ct_destroy_timeout(struct nf_conn *ct); +#else +static inline int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, + u8 l3num, u8 l4num, + const char *timeout_name) +{ + return -EOPNOTSUPP; +} + +static inline void nf_ct_destroy_timeout(struct nf_conn *ct) +{ + return; +} +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ + +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT +struct nf_ct_timeout_hooks { + struct nf_ct_timeout *(*timeout_find_get)(struct net *net, const char *name); + void (*timeout_put)(struct nf_ct_timeout *timeout); +}; + +extern const struct nf_ct_timeout_hooks __rcu *nf_ct_timeout_hook; +#endif + +#endif /* _NF_CONNTRACK_TIMEOUT_H */ diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h new file mode 100644 index 0000000000..57138d974a --- /dev/null +++ b/include/net/netfilter/nf_conntrack_timestamp.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_CONNTRACK_TSTAMP_H +#define _NF_CONNTRACK_TSTAMP_H + +#include <net/net_namespace.h> +#include <linux/netfilter/nf_conntrack_common.h> +#include <linux/netfilter/nf_conntrack_tuple_common.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_extend.h> + +struct nf_conn_tstamp { + u_int64_t start; + u_int64_t stop; +}; + +static inline +struct nf_conn_tstamp *nf_conn_tstamp_find(const struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + return nf_ct_ext_find(ct, NF_CT_EXT_TSTAMP); +#else + return NULL; +#endif +} + +static inline +struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp) +{ +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + struct net *net = nf_ct_net(ct); + + if (!net->ct.sysctl_tstamp) + return NULL; + + return nf_ct_ext_add(ct, NF_CT_EXT_TSTAMP, gfp); +#else + return NULL; +#endif +}; + +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP +void nf_conntrack_tstamp_pernet_init(struct net *net); +#else +static inline void nf_conntrack_tstamp_pernet_init(struct net *net) {} +#endif /* CONFIG_NF_CONNTRACK_TIMESTAMP */ + +#endif /* _NF_CONNTRACK_TSTAMP_H */ diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h new file mode 100644 index 0000000000..f7dd950ff2 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -0,0 +1,190 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Definitions and Declarations for tuple. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - generalize L3 protocol dependent part. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_tuple.h + */ + +#ifndef _NF_CONNTRACK_TUPLE_H +#define _NF_CONNTRACK_TUPLE_H + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/nf_conntrack_tuple_common.h> +#include <linux/list_nulls.h> + +/* A `tuple' is a structure containing the information to uniquely + identify a connection. ie. if two packets have the same tuple, they + are in the same connection; if not, they are not. + + We divide the structure along "manipulatable" and + "non-manipulatable" lines, for the benefit of the NAT code. +*/ + +#define NF_CT_TUPLE_L3SIZE ARRAY_SIZE(((union nf_inet_addr *)NULL)->all) + +/* The manipulable part of the tuple. */ +struct nf_conntrack_man { + union nf_inet_addr u3; + union nf_conntrack_man_proto u; + /* Layer 3 protocol */ + u_int16_t l3num; +}; + +/* This contains the information to distinguish a connection. */ +struct nf_conntrack_tuple { + struct nf_conntrack_man src; + + /* These are the parts of the tuple which are fixed. */ + struct { + union nf_inet_addr u3; + union { + /* Add other protocols here. */ + __be16 all; + + struct { + __be16 port; + } tcp; + struct { + __be16 port; + } udp; + struct { + u_int8_t type, code; + } icmp; + struct { + __be16 port; + } dccp; + struct { + __be16 port; + } sctp; + struct { + __be16 key; + } gre; + } u; + + /* The protocol. */ + u_int8_t protonum; + + /* The direction must be ignored for the tuplehash */ + struct { } __nfct_hash_offsetend; + + /* The direction (for tuplehash) */ + u_int8_t dir; + } dst; +}; + +struct nf_conntrack_tuple_mask { + struct { + union nf_inet_addr u3; + union nf_conntrack_man_proto u; + } src; +}; + +static inline void nf_ct_dump_tuple_ip(const struct nf_conntrack_tuple *t) +{ +#ifdef DEBUG + printk("tuple %p: %u %pI4:%hu -> %pI4:%hu\n", + t, t->dst.protonum, + &t->src.u3.ip, ntohs(t->src.u.all), + &t->dst.u3.ip, ntohs(t->dst.u.all)); +#endif +} + +static inline void nf_ct_dump_tuple_ipv6(const struct nf_conntrack_tuple *t) +{ +#ifdef DEBUG + printk("tuple %p: %u %pI6 %hu -> %pI6 %hu\n", + t, t->dst.protonum, + t->src.u3.all, ntohs(t->src.u.all), + t->dst.u3.all, ntohs(t->dst.u.all)); +#endif +} + +static inline void nf_ct_dump_tuple(const struct nf_conntrack_tuple *t) +{ + switch (t->src.l3num) { + case AF_INET: + nf_ct_dump_tuple_ip(t); + break; + case AF_INET6: + nf_ct_dump_tuple_ipv6(t); + break; + } +} + +/* If we're the first tuple, it's the original dir. */ +#define NF_CT_DIRECTION(h) \ + ((enum ip_conntrack_dir)(h)->tuple.dst.dir) + +/* Connections have two entries in the hash table: one for each way */ +struct nf_conntrack_tuple_hash { + struct hlist_nulls_node hnnode; + struct nf_conntrack_tuple tuple; +}; + +static inline bool __nf_ct_tuple_src_equal(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2) +{ + return (nf_inet_addr_cmp(&t1->src.u3, &t2->src.u3) && + t1->src.u.all == t2->src.u.all && + t1->src.l3num == t2->src.l3num); +} + +static inline bool __nf_ct_tuple_dst_equal(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2) +{ + return (nf_inet_addr_cmp(&t1->dst.u3, &t2->dst.u3) && + t1->dst.u.all == t2->dst.u.all && + t1->dst.protonum == t2->dst.protonum); +} + +static inline bool nf_ct_tuple_equal(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2) +{ + return __nf_ct_tuple_src_equal(t1, t2) && + __nf_ct_tuple_dst_equal(t1, t2); +} + +static inline bool +nf_ct_tuple_mask_equal(const struct nf_conntrack_tuple_mask *m1, + const struct nf_conntrack_tuple_mask *m2) +{ + return (nf_inet_addr_cmp(&m1->src.u3, &m2->src.u3) && + m1->src.u.all == m2->src.u.all); +} + +static inline bool +nf_ct_tuple_src_mask_cmp(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2, + const struct nf_conntrack_tuple_mask *mask) +{ + int count; + + for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++) { + if ((t1->src.u3.all[count] ^ t2->src.u3.all[count]) & + mask->src.u3.all[count]) + return false; + } + + if ((t1->src.u.all ^ t2->src.u.all) & mask->src.u.all) + return false; + + if (t1->src.l3num != t2->src.l3num || + t1->dst.protonum != t2->dst.protonum) + return false; + + return true; +} + +static inline bool +nf_ct_tuple_mask_cmp(const struct nf_conntrack_tuple *t, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple_mask *mask) +{ + return nf_ct_tuple_src_mask_cmp(t, tuple, mask) && + __nf_ct_tuple_dst_equal(t, tuple); +} + +#endif /* _NF_CONNTRACK_TUPLE_H */ diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h new file mode 100644 index 0000000000..48dbadb96f --- /dev/null +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_CONNTRACK_ZONES_H +#define _NF_CONNTRACK_ZONES_H + +#include <linux/netfilter/nf_conntrack_zones_common.h> +#include <net/netfilter/nf_conntrack.h> + +static inline const struct nf_conntrack_zone * +nf_ct_zone(const struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_ZONES + return &ct->zone; +#else + return &nf_ct_zone_dflt; +#endif +} + +static inline const struct nf_conntrack_zone * +nf_ct_zone_init(struct nf_conntrack_zone *zone, u16 id, u8 dir, u8 flags) +{ + zone->id = id; + zone->flags = flags; + zone->dir = dir; + + return zone; +} + +static inline const struct nf_conntrack_zone * +nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb, + struct nf_conntrack_zone *tmp) +{ +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (!tmpl) + return &nf_ct_zone_dflt; + + if (tmpl->zone.flags & NF_CT_FLAG_MARK) + return nf_ct_zone_init(tmp, skb->mark, tmpl->zone.dir, 0); +#endif + return nf_ct_zone(tmpl); +} + +static inline void nf_ct_zone_add(struct nf_conn *ct, + const struct nf_conntrack_zone *zone) +{ +#ifdef CONFIG_NF_CONNTRACK_ZONES + ct->zone = *zone; +#endif +} + +static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone, + enum ip_conntrack_dir dir) +{ + return zone->dir & (1 << dir); +} + +static inline u16 nf_ct_zone_id(const struct nf_conntrack_zone *zone, + enum ip_conntrack_dir dir) +{ +#ifdef CONFIG_NF_CONNTRACK_ZONES + return nf_ct_zone_matches_dir(zone, dir) ? + zone->id : NF_CT_DEFAULT_ZONE_ID; +#else + return NF_CT_DEFAULT_ZONE_ID; +#endif +} + +static inline bool nf_ct_zone_equal(const struct nf_conn *a, + const struct nf_conntrack_zone *b, + enum ip_conntrack_dir dir) +{ +#ifdef CONFIG_NF_CONNTRACK_ZONES + return nf_ct_zone_id(nf_ct_zone(a), dir) == + nf_ct_zone_id(b, dir); +#else + return true; +#endif +} + +static inline bool nf_ct_zone_equal_any(const struct nf_conn *a, + const struct nf_conntrack_zone *b) +{ +#ifdef CONFIG_NF_CONNTRACK_ZONES + return nf_ct_zone(a)->id == b->id; +#else + return true; +#endif +} + +#endif /* _NF_CONNTRACK_ZONES_H */ diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h new file mode 100644 index 0000000000..b175d271ae --- /dev/null +++ b/include/net/netfilter/nf_dup_netdev.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_DUP_NETDEV_H_ +#define _NF_DUP_NETDEV_H_ + +#include <net/netfilter/nf_tables.h> + +void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif); +void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif); + +struct nft_offload_ctx; +struct nft_flow_rule; + +int nft_fwd_dup_netdev_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + enum flow_action_id id, int oif); +#endif diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h new file mode 100644 index 0000000000..692d595591 --- /dev/null +++ b/include/net/netfilter/nf_flow_table.h @@ -0,0 +1,375 @@ +#ifndef _NF_FLOW_TABLE_H +#define _NF_FLOW_TABLE_H + +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/rhashtable-types.h> +#include <linux/rcupdate.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_conntrack_tuple_common.h> +#include <net/flow_offload.h> +#include <net/dst.h> +#include <linux/if_pppox.h> +#include <linux/ppp_defs.h> + +struct nf_flowtable; +struct nf_flow_rule; +struct flow_offload; +enum flow_offload_tuple_dir; + +struct nf_flow_key { + struct flow_dissector_key_meta meta; + struct flow_dissector_key_control control; + struct flow_dissector_key_control enc_control; + struct flow_dissector_key_basic basic; + struct flow_dissector_key_vlan vlan; + struct flow_dissector_key_vlan cvlan; + union { + struct flow_dissector_key_ipv4_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; + }; + struct flow_dissector_key_keyid enc_key_id; + union { + struct flow_dissector_key_ipv4_addrs enc_ipv4; + struct flow_dissector_key_ipv6_addrs enc_ipv6; + }; + struct flow_dissector_key_tcp tcp; + struct flow_dissector_key_ports tp; +} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ + +struct nf_flow_match { + struct flow_dissector dissector; + struct nf_flow_key key; + struct nf_flow_key mask; +}; + +struct nf_flow_rule { + struct nf_flow_match match; + struct flow_rule *rule; +}; + +struct nf_flowtable_type { + struct list_head list; + int family; + int (*init)(struct nf_flowtable *ft); + bool (*gc)(const struct flow_offload *flow); + int (*setup)(struct nf_flowtable *ft, + struct net_device *dev, + enum flow_block_command cmd); + int (*action)(struct net *net, + struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule); + void (*free)(struct nf_flowtable *ft); + void (*get)(struct nf_flowtable *ft); + void (*put)(struct nf_flowtable *ft); + nf_hookfn *hook; + struct module *owner; +}; + +enum nf_flowtable_flags { + NF_FLOWTABLE_HW_OFFLOAD = 0x1, /* NFT_FLOWTABLE_HW_OFFLOAD */ + NF_FLOWTABLE_COUNTER = 0x2, /* NFT_FLOWTABLE_COUNTER */ +}; + +struct nf_flowtable { + struct list_head list; + struct rhashtable rhashtable; + int priority; + const struct nf_flowtable_type *type; + struct delayed_work gc_work; + unsigned int flags; + struct flow_block flow_block; + struct rw_semaphore flow_block_lock; /* Guards flow_block */ + possible_net_t net; +}; + +static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable) +{ + return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD; +} + +enum flow_offload_tuple_dir { + FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL, + FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY, +}; +#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX + +enum flow_offload_xmit_type { + FLOW_OFFLOAD_XMIT_UNSPEC = 0, + FLOW_OFFLOAD_XMIT_NEIGH, + FLOW_OFFLOAD_XMIT_XFRM, + FLOW_OFFLOAD_XMIT_DIRECT, + FLOW_OFFLOAD_XMIT_TC, +}; + +#define NF_FLOW_TABLE_ENCAP_MAX 2 + +struct flow_offload_tuple { + union { + struct in_addr src_v4; + struct in6_addr src_v6; + }; + union { + struct in_addr dst_v4; + struct in6_addr dst_v6; + }; + struct { + __be16 src_port; + __be16 dst_port; + }; + + int iifidx; + + u8 l3proto; + u8 l4proto; + struct { + u16 id; + __be16 proto; + } encap[NF_FLOW_TABLE_ENCAP_MAX]; + + /* All members above are keys for lookups, see flow_offload_hash(). */ + struct { } __hash; + + u8 dir:2, + xmit_type:3, + encap_num:2, + in_vlan_ingress:2; + u16 mtu; + union { + struct { + struct dst_entry *dst_cache; + u32 dst_cookie; + }; + struct { + u32 ifidx; + u32 hw_ifidx; + u8 h_source[ETH_ALEN]; + u8 h_dest[ETH_ALEN]; + } out; + struct { + u32 iifidx; + } tc; + }; +}; + +struct flow_offload_tuple_rhash { + struct rhash_head node; + struct flow_offload_tuple tuple; +}; + +enum nf_flow_flags { + NF_FLOW_SNAT, + NF_FLOW_DNAT, + NF_FLOW_TEARDOWN, + NF_FLOW_HW, + NF_FLOW_HW_DYING, + NF_FLOW_HW_DEAD, + NF_FLOW_HW_PENDING, + NF_FLOW_HW_BIDIRECTIONAL, + NF_FLOW_HW_ESTABLISHED, +}; + +enum flow_offload_type { + NF_FLOW_OFFLOAD_UNSPEC = 0, + NF_FLOW_OFFLOAD_ROUTE, +}; + +struct flow_offload { + struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX]; + struct nf_conn *ct; + unsigned long flags; + u16 type; + u32 timeout; + struct rcu_head rcu_head; +}; + +#define NF_FLOW_TIMEOUT (30 * HZ) +#define nf_flowtable_time_stamp (u32)jiffies + +unsigned long flow_offload_get_timeout(struct flow_offload *flow); + +static inline __s32 nf_flow_timeout_delta(unsigned int timeout) +{ + return (__s32)(timeout - nf_flowtable_time_stamp); +} + +struct nf_flow_route { + struct { + struct dst_entry *dst; + struct { + u32 ifindex; + struct { + u16 id; + __be16 proto; + } encap[NF_FLOW_TABLE_ENCAP_MAX]; + u8 num_encaps:2, + ingress_vlans:2; + } in; + struct { + u32 ifindex; + u32 hw_ifindex; + u8 h_source[ETH_ALEN]; + u8 h_dest[ETH_ALEN]; + } out; + enum flow_offload_xmit_type xmit_type; + } tuple[FLOW_OFFLOAD_DIR_MAX]; +}; + +struct flow_offload *flow_offload_alloc(struct nf_conn *ct); +void flow_offload_free(struct flow_offload *flow); + +static inline int +nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, + flow_setup_cb_t *cb, void *cb_priv) +{ + struct flow_block *block = &flow_table->flow_block; + struct flow_block_cb *block_cb; + int err = 0; + + down_write(&flow_table->flow_block_lock); + block_cb = flow_block_cb_lookup(block, cb, cb_priv); + if (block_cb) { + err = -EEXIST; + goto unlock; + } + + block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL); + if (IS_ERR(block_cb)) { + err = PTR_ERR(block_cb); + goto unlock; + } + + list_add_tail(&block_cb->list, &block->cb_list); + up_write(&flow_table->flow_block_lock); + + if (flow_table->type->get) + flow_table->type->get(flow_table); + return 0; + +unlock: + up_write(&flow_table->flow_block_lock); + return err; +} + +static inline void +nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, + flow_setup_cb_t *cb, void *cb_priv) +{ + struct flow_block *block = &flow_table->flow_block; + struct flow_block_cb *block_cb; + + down_write(&flow_table->flow_block_lock); + block_cb = flow_block_cb_lookup(block, cb, cb_priv); + if (block_cb) { + list_del(&block_cb->list); + flow_block_cb_free(block_cb); + } else { + WARN_ON(true); + } + up_write(&flow_table->flow_block_lock); + + if (flow_table->type->put) + flow_table->type->put(flow_table); +} + +void flow_offload_route_init(struct flow_offload *flow, + const struct nf_flow_route *route); + +int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); +void flow_offload_refresh(struct nf_flowtable *flow_table, + struct flow_offload *flow, bool force); + +struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, + struct flow_offload_tuple *tuple); +void nf_flow_table_gc_run(struct nf_flowtable *flow_table); +void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable, + struct net_device *dev); +void nf_flow_table_cleanup(struct net_device *dev); + +int nf_flow_table_init(struct nf_flowtable *flow_table); +void nf_flow_table_free(struct nf_flowtable *flow_table); + +void flow_offload_teardown(struct flow_offload *flow); + +void nf_flow_snat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir); +void nf_flow_dnat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir); + +struct flow_ports { + __be16 source, dest; +}; + +unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); +unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); + +#define MODULE_ALIAS_NF_FLOWTABLE(family) \ + MODULE_ALIAS("nf-flowtable-" __stringify(family)) + +void nf_flow_offload_add(struct nf_flowtable *flowtable, + struct flow_offload *flow); +void nf_flow_offload_del(struct nf_flowtable *flowtable, + struct flow_offload *flow); +void nf_flow_offload_stats(struct nf_flowtable *flowtable, + struct flow_offload *flow); + +void nf_flow_table_offload_flush(struct nf_flowtable *flowtable); +void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable); + +int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd); +int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule); +int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule); + +int nf_flow_table_offload_init(void); +void nf_flow_table_offload_exit(void); + +static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) +{ + __be16 proto; + + proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + + sizeof(struct pppoe_hdr))); + switch (proto) { + case htons(PPP_IP): + return htons(ETH_P_IP); + case htons(PPP_IPV6): + return htons(ETH_P_IPV6); + } + + return 0; +} + +#define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count) +#define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count) +#define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \ + this_cpu_inc((net)->ft.stat->count) +#define NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count) \ + this_cpu_dec((net)->ft.stat->count) + +#ifdef CONFIG_NF_FLOW_TABLE_PROCFS +int nf_flow_table_init_proc(struct net *net); +void nf_flow_table_fini_proc(struct net *net); +#else +static inline int nf_flow_table_init_proc(struct net *net) +{ + return 0; +} + +static inline void nf_flow_table_fini_proc(struct net *net) +{ +} +#endif /* CONFIG_NF_FLOW_TABLE_PROCFS */ + +#endif /* _NF_FLOW_TABLE_H */ diff --git a/include/net/netfilter/nf_hooks_lwtunnel.h b/include/net/netfilter/nf_hooks_lwtunnel.h new file mode 100644 index 0000000000..52e27920f8 --- /dev/null +++ b/include/net/netfilter/nf_hooks_lwtunnel.h @@ -0,0 +1,7 @@ +#include <linux/sysctl.h> +#include <linux/types.h> + +#ifdef CONFIG_SYSCTL +int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +#endif diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h new file mode 100644 index 0000000000..e55eedc84e --- /dev/null +++ b/include/net/netfilter/nf_log.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_LOG_H +#define _NF_LOG_H + +#include <linux/netfilter.h> +#include <linux/netfilter/nf_log.h> + +/* Log tcp sequence, tcp options, ip options and uid owning local socket */ +#define NF_LOG_DEFAULT_MASK 0x0f + +/* This flag indicates that copy_len field in nf_loginfo is set */ +#define NF_LOG_F_COPY_LEN 0x1 + +enum nf_log_type { + NF_LOG_TYPE_LOG = 0, + NF_LOG_TYPE_ULOG, + NF_LOG_TYPE_MAX +}; + +struct nf_loginfo { + u_int8_t type; + union { + struct { + /* copy_len will be used iff you set + * NF_LOG_F_COPY_LEN in flags + */ + u_int32_t copy_len; + u_int16_t group; + u_int16_t qthreshold; + u_int16_t flags; + } ulog; + struct { + u_int8_t level; + u_int8_t logflags; + } log; + } u; +}; + +typedef void nf_logfn(struct net *net, + u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *li, + const char *prefix); + +struct nf_logger { + char *name; + enum nf_log_type type; + nf_logfn *logfn; + struct module *me; +}; + +/* sysctl_nf_log_all_netns - allow LOG target in all network namespaces */ +extern int sysctl_nf_log_all_netns; + +/* Function to register/unregister log function. */ +int nf_log_register(u_int8_t pf, struct nf_logger *logger); +void nf_log_unregister(struct nf_logger *logger); + +int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger); +void nf_log_unset(struct net *net, const struct nf_logger *logger); + +int nf_log_bind_pf(struct net *net, u_int8_t pf, + const struct nf_logger *logger); +void nf_log_unbind_pf(struct net *net, u_int8_t pf); + +int nf_logger_find_get(int pf, enum nf_log_type type); +void nf_logger_put(int pf, enum nf_log_type type); + +#define MODULE_ALIAS_NF_LOGGER(family, type) \ + MODULE_ALIAS("nf-logger-" __stringify(family) "-" __stringify(type)) + +/* Calls the registered backend logging function */ +__printf(8, 9) +void nf_log_packet(struct net *net, + u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *li, + const char *fmt, ...); + +__printf(8, 9) +void nf_log_trace(struct net *net, + u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *li, + const char *fmt, ...); + +struct nf_log_buf; + +struct nf_log_buf *nf_log_buf_open(void); +__printf(2, 3) int nf_log_buf_add(struct nf_log_buf *m, const char *f, ...); +void nf_log_buf_close(struct nf_log_buf *m); +#endif /* _NF_LOG_H */ diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h new file mode 100644 index 0000000000..9877f06454 --- /dev/null +++ b/include/net/netfilter/nf_nat.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_NAT_H +#define _NF_NAT_H + +#include <linux/list.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter/nf_conntrack_pptp.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_extend.h> +#include <net/netfilter/nf_conntrack_tuple.h> +#include <uapi/linux/netfilter/nf_nat.h> + +enum nf_nat_manip_type { + NF_NAT_MANIP_SRC, + NF_NAT_MANIP_DST +}; + +/* SRC manip occurs POST_ROUTING or LOCAL_IN */ +#define HOOK2MANIP(hooknum) ((hooknum) != NF_INET_POST_ROUTING && \ + (hooknum) != NF_INET_LOCAL_IN) + +/* per conntrack: nat application helper private data */ +union nf_conntrack_nat_help { + /* insert nat helper private data here */ +#if IS_ENABLED(CONFIG_NF_NAT_PPTP) + struct nf_nat_pptp nat_pptp_info; +#endif +}; + +/* The structure embedded in the conntrack structure. */ +struct nf_conn_nat { + union nf_conntrack_nat_help help; +#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE) + int masq_index; +#endif +}; + +/* Set up the info structure to map into this range. */ +unsigned int nf_nat_setup_info(struct nf_conn *ct, + const struct nf_nat_range2 *range, + enum nf_nat_manip_type maniptype); + +extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct, + unsigned int hooknum); + +struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct); + +static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) +{ +#if IS_ENABLED(CONFIG_NF_NAT) + return nf_ct_ext_find(ct, NF_CT_EXT_NAT); +#else + return NULL; +#endif +} + +static inline bool nf_nat_oif_changed(unsigned int hooknum, + enum ip_conntrack_info ctinfo, + struct nf_conn_nat *nat, + const struct net_device *out) +{ +#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE) + return nat && nat->masq_index && hooknum == NF_INET_POST_ROUTING && + CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && + nat->masq_index != out->ifindex; +#else + return false; +#endif +} + +int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, + const struct nf_hook_ops *nat_ops, unsigned int ops_count); +void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, + unsigned int ops_count); + +unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int hooknum, struct sk_buff *skb); + +unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct, + enum nf_nat_manip_type mtype, + enum ip_conntrack_dir dir); +void nf_nat_csum_recalc(struct sk_buff *skb, + u8 nfproto, u8 proto, void *data, __sum16 *check, + int datalen, int oldlen); + +int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum); + +int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, unsigned int hdrlen); + +int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops); +void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops); + +int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops); +void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops); + +int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops); +void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops); + +unsigned int +nf_nat_inet_fn(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); + +int nf_ct_nat(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, int *action, + const struct nf_nat_range2 *range, bool commit); + +static inline int nf_nat_initialized(const struct nf_conn *ct, + enum nf_nat_manip_type manip) +{ + if (manip == NF_NAT_MANIP_SRC) + return ct->status & IPS_SRC_NAT_DONE; + else + return ct->status & IPS_DST_NAT_DONE; +} +#endif diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h new file mode 100644 index 0000000000..44c421b9be --- /dev/null +++ b/include/net/netfilter/nf_nat_helper.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_NAT_HELPER_H +#define _NF_NAT_HELPER_H +/* NAT protocol helper routines. */ + +#include <linux/skbuff.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_expect.h> + +/* These return true or false. */ +bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int match_offset, + unsigned int match_len, const char *rep_buffer, + unsigned int rep_len, bool adjust); + +static inline bool nf_nat_mangle_tcp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len) +{ + return __nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, + match_offset, match_len, + rep_buffer, rep_len, true); +} + +bool nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int match_offset, + unsigned int match_len, const char *rep_buffer, + unsigned int rep_len); + +/* Setup NAT on this expected conntrack so it follows master, but goes + * to port ct->master->saved_proto. */ +void nf_nat_follow_master(struct nf_conn *ct, struct nf_conntrack_expect *this); + +u16 nf_nat_exp_find_port(struct nf_conntrack_expect *exp, u16 port); +#endif diff --git a/include/net/netfilter/nf_nat_masquerade.h b/include/net/netfilter/nf_nat_masquerade.h new file mode 100644 index 0000000000..be7abc9d5f --- /dev/null +++ b/include/net/netfilter/nf_nat_masquerade.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_NAT_MASQUERADE_H_ +#define _NF_NAT_MASQUERADE_H_ + +#include <linux/skbuff.h> +#include <net/netfilter/nf_nat.h> + +unsigned int +nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, + const struct nf_nat_range2 *range, + const struct net_device *out); + +int nf_nat_masquerade_inet_register_notifiers(void); +void nf_nat_masquerade_inet_unregister_notifiers(void); + +unsigned int +nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, + const struct net_device *out); + +#endif /*_NF_NAT_MASQUERADE_H_ */ diff --git a/include/net/netfilter/nf_nat_redirect.h b/include/net/netfilter/nf_nat_redirect.h new file mode 100644 index 0000000000..279380de90 --- /dev/null +++ b/include/net/netfilter/nf_nat_redirect.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_NAT_REDIRECT_H_ +#define _NF_NAT_REDIRECT_H_ + +#include <linux/skbuff.h> +#include <uapi/linux/netfilter/nf_nat.h> + +unsigned int +nf_nat_redirect_ipv4(struct sk_buff *skb, const struct nf_nat_range2 *range, + unsigned int hooknum); +unsigned int +nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, + unsigned int hooknum); + +#endif /* _NF_NAT_REDIRECT_H_ */ diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h new file mode 100644 index 0000000000..c81021ab07 --- /dev/null +++ b/include/net/netfilter/nf_queue.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_QUEUE_H +#define _NF_QUEUE_H + +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/jhash.h> +#include <linux/netfilter.h> +#include <linux/skbuff.h> + +/* Each queued (to userspace) skbuff has one of these. */ +struct nf_queue_entry { + struct list_head list; + struct sk_buff *skb; + unsigned int id; + unsigned int hook_index; /* index in hook_entries->hook[] */ +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + struct net_device *physin; + struct net_device *physout; +#endif + struct nf_hook_state state; + u16 size; /* sizeof(entry) + saved route keys */ + + /* extra space to store route keys */ +}; + +#define nf_queue_entry_reroute(x) ((void *)x + sizeof(struct nf_queue_entry)) + +/* Packet queuing */ +struct nf_queue_handler { + int (*outfn)(struct nf_queue_entry *entry, + unsigned int queuenum); + void (*nf_hook_drop)(struct net *net); +}; + +void nf_register_queue_handler(const struct nf_queue_handler *qh); +void nf_unregister_queue_handler(void); +void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); + +bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); +void nf_queue_entry_free(struct nf_queue_entry *entry); + +static inline void init_hashrandom(u32 *jhash_initval) +{ + while (*jhash_initval == 0) + *jhash_initval = get_random_u32(); +} + +static inline u32 hash_v4(const struct iphdr *iph, u32 initval) +{ + /* packets in either direction go into same queue */ + if ((__force u32)iph->saddr < (__force u32)iph->daddr) + return jhash_3words((__force u32)iph->saddr, + (__force u32)iph->daddr, iph->protocol, initval); + + return jhash_3words((__force u32)iph->daddr, + (__force u32)iph->saddr, iph->protocol, initval); +} + +static inline u32 hash_v6(const struct ipv6hdr *ip6h, u32 initval) +{ + u32 a, b, c; + + if ((__force u32)ip6h->saddr.s6_addr32[3] < + (__force u32)ip6h->daddr.s6_addr32[3]) { + a = (__force u32) ip6h->saddr.s6_addr32[3]; + b = (__force u32) ip6h->daddr.s6_addr32[3]; + } else { + b = (__force u32) ip6h->saddr.s6_addr32[3]; + a = (__force u32) ip6h->daddr.s6_addr32[3]; + } + + if ((__force u32)ip6h->saddr.s6_addr32[1] < + (__force u32)ip6h->daddr.s6_addr32[1]) + c = (__force u32) ip6h->saddr.s6_addr32[1]; + else + c = (__force u32) ip6h->daddr.s6_addr32[1]; + + return jhash_3words(a, b, c, initval); +} + +static inline u32 hash_bridge(const struct sk_buff *skb, u32 initval) +{ + struct ipv6hdr *ip6h, _ip6h; + struct iphdr *iph, _iph; + + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + iph = skb_header_pointer(skb, skb_network_offset(skb), + sizeof(*iph), &_iph); + if (iph) + return hash_v4(iph, initval); + break; + case htons(ETH_P_IPV6): + ip6h = skb_header_pointer(skb, skb_network_offset(skb), + sizeof(*ip6h), &_ip6h); + if (ip6h) + return hash_v6(ip6h, initval); + break; + } + + return 0; +} + +static inline u32 +nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, + u32 initval) +{ + switch (family) { + case NFPROTO_IPV4: + queue += reciprocal_scale(hash_v4(ip_hdr(skb), initval), + queues_total); + break; + case NFPROTO_IPV6: + queue += reciprocal_scale(hash_v6(ipv6_hdr(skb), initval), + queues_total); + break; + case NFPROTO_BRIDGE: + queue += reciprocal_scale(hash_bridge(skb, initval), + queues_total); + break; + } + + return queue; +} + +int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, + unsigned int index, unsigned int verdict); + +#endif /* _NF_QUEUE_H */ diff --git a/include/net/netfilter/nf_reject.h b/include/net/netfilter/nf_reject.h new file mode 100644 index 0000000000..7c669792fb --- /dev/null +++ b/include/net/netfilter/nf_reject.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_REJECT_H +#define _NF_REJECT_H + +#include <linux/types.h> +#include <uapi/linux/in.h> + +static inline bool nf_reject_verify_csum(struct sk_buff *skb, int dataoff, + __u8 proto) +{ + /* Skip protocols that don't use 16-bit one's complement checksum + * of the entire payload. + */ + switch (proto) { + /* Protocols with optional checksums. */ + case IPPROTO_UDP: { + const struct udphdr *udp_hdr; + struct udphdr _udp_hdr; + + udp_hdr = skb_header_pointer(skb, dataoff, + sizeof(_udp_hdr), + &_udp_hdr); + if (!udp_hdr || udp_hdr->check) + return true; + + return false; + } + case IPPROTO_GRE: + + /* Protocols with other integrity checks. */ + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_SCTP: + + /* Protocols with partial checksums. */ + case IPPROTO_UDPLITE: + case IPPROTO_DCCP: + return false; + } + return true; +} + +#endif /* _NF_REJECT_H */ diff --git a/include/net/netfilter/nf_socket.h b/include/net/netfilter/nf_socket.h new file mode 100644 index 0000000000..f9d7bee9bd --- /dev/null +++ b/include/net/netfilter/nf_socket.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_SOCK_H_ +#define _NF_SOCK_H_ + +#include <net/sock.h> + +struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, + const struct net_device *indev); + +struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, + const struct net_device *indev); + +#endif diff --git a/include/net/netfilter/nf_synproxy.h b/include/net/netfilter/nf_synproxy.h new file mode 100644 index 0000000000..a336f9434e --- /dev/null +++ b/include/net/netfilter/nf_synproxy.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_SYNPROXY_SHARED_H +#define _NF_SYNPROXY_SHARED_H + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/ip6_checksum.h> +#include <net/ip6_route.h> +#include <net/tcp.h> + +#include <net/netfilter/nf_conntrack_seqadj.h> +#include <net/netfilter/nf_conntrack_synproxy.h> + +struct synproxy_stats { + unsigned int syn_received; + unsigned int cookie_invalid; + unsigned int cookie_valid; + unsigned int cookie_retrans; + unsigned int conn_reopened; +}; + +struct synproxy_net { + struct nf_conn *tmpl; + struct synproxy_stats __percpu *stats; + unsigned int hook_ref4; + unsigned int hook_ref6; +}; + +extern unsigned int synproxy_net_id; +static inline struct synproxy_net *synproxy_pernet(struct net *net) +{ + return net_generic(net, synproxy_net_id); +} + +struct synproxy_options { + u8 options; + u8 wscale; + u16 mss_option; + u16 mss_encode; + u32 tsval; + u32 tsecr; +}; + +struct nf_synproxy_info; +bool synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, + const struct tcphdr *th, + struct synproxy_options *opts); + +void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info, + struct synproxy_options *opts); + +void synproxy_send_client_synack(struct net *net, const struct sk_buff *skb, + const struct tcphdr *th, + const struct synproxy_options *opts); + +bool synproxy_recv_client_ack(struct net *net, + const struct sk_buff *skb, + const struct tcphdr *th, + struct synproxy_options *opts, u32 recv_seq); + +struct nf_hook_state; + +unsigned int ipv4_synproxy_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *nhs); +int nf_synproxy_ipv4_init(struct synproxy_net *snet, struct net *net); +void nf_synproxy_ipv4_fini(struct synproxy_net *snet, struct net *net); + +#if IS_ENABLED(CONFIG_IPV6) +void synproxy_send_client_synack_ipv6(struct net *net, + const struct sk_buff *skb, + const struct tcphdr *th, + const struct synproxy_options *opts); + +bool synproxy_recv_client_ack_ipv6(struct net *net, const struct sk_buff *skb, + const struct tcphdr *th, + struct synproxy_options *opts, u32 recv_seq); + +unsigned int ipv6_synproxy_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *nhs); +int nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net); +void nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net); +#else +static inline int +nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net) { return 0; } +static inline void +nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net) {}; +#endif /* CONFIG_IPV6 */ + +#endif /* _NF_SYNPROXY_SHARED_H */ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h new file mode 100644 index 0000000000..75972e211b --- /dev/null +++ b/include/net/netfilter/nf_tables.h @@ -0,0 +1,1769 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NET_NF_TABLES_H +#define _NET_NF_TABLES_H + +#include <asm/unaligned.h> +#include <linux/list.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/nf_tables.h> +#include <linux/u64_stats_sync.h> +#include <linux/rhashtable.h> +#include <net/netfilter/nf_flow_table.h> +#include <net/netlink.h> +#include <net/flow_offload.h> +#include <net/netns/generic.h> + +#define NFT_MAX_HOOKS (NF_INET_INGRESS + 1) + +struct module; + +#define NFT_JUMP_STACK_SIZE 16 + +enum { + NFT_PKTINFO_L4PROTO = (1 << 0), + NFT_PKTINFO_INNER = (1 << 1), + NFT_PKTINFO_INNER_FULL = (1 << 2), +}; + +struct nft_pktinfo { + struct sk_buff *skb; + const struct nf_hook_state *state; + u8 flags; + u8 tprot; + u16 fragoff; + u16 thoff; + u16 inneroff; +}; + +static inline struct sock *nft_sk(const struct nft_pktinfo *pkt) +{ + return pkt->state->sk; +} + +static inline unsigned int nft_thoff(const struct nft_pktinfo *pkt) +{ + return pkt->thoff; +} + +static inline struct net *nft_net(const struct nft_pktinfo *pkt) +{ + return pkt->state->net; +} + +static inline unsigned int nft_hook(const struct nft_pktinfo *pkt) +{ + return pkt->state->hook; +} + +static inline u8 nft_pf(const struct nft_pktinfo *pkt) +{ + return pkt->state->pf; +} + +static inline const struct net_device *nft_in(const struct nft_pktinfo *pkt) +{ + return pkt->state->in; +} + +static inline const struct net_device *nft_out(const struct nft_pktinfo *pkt) +{ + return pkt->state->out; +} + +static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + pkt->skb = skb; + pkt->state = state; +} + +static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt) +{ + pkt->flags = 0; + pkt->tprot = 0; + pkt->thoff = 0; + pkt->fragoff = 0; +} + +/** + * struct nft_verdict - nf_tables verdict + * + * @code: nf_tables/netfilter verdict code + * @chain: destination chain for NFT_JUMP/NFT_GOTO + */ +struct nft_verdict { + u32 code; + struct nft_chain *chain; +}; + +struct nft_data { + union { + u32 data[4]; + struct nft_verdict verdict; + }; +} __attribute__((aligned(__alignof__(u64)))); + +#define NFT_REG32_NUM 20 + +/** + * struct nft_regs - nf_tables register set + * + * @data: data registers + * @verdict: verdict register + * + * The first four data registers alias to the verdict register. + */ +struct nft_regs { + union { + u32 data[NFT_REG32_NUM]; + struct nft_verdict verdict; + }; +}; + +struct nft_regs_track { + struct { + const struct nft_expr *selector; + const struct nft_expr *bitwise; + u8 num_reg; + } regs[NFT_REG32_NUM]; + + const struct nft_expr *cur; + const struct nft_expr *last; +}; + +/* Store/load an u8, u16 or u64 integer to/from the u32 data register. + * + * Note, when using concatenations, register allocation happens at 32-bit + * level. So for store instruction, pad the rest part with zero to avoid + * garbage values. + */ + +static inline void nft_reg_store8(u32 *dreg, u8 val) +{ + *dreg = 0; + *(u8 *)dreg = val; +} + +static inline u8 nft_reg_load8(const u32 *sreg) +{ + return *(u8 *)sreg; +} + +static inline void nft_reg_store16(u32 *dreg, u16 val) +{ + *dreg = 0; + *(u16 *)dreg = val; +} + +static inline void nft_reg_store_be16(u32 *dreg, __be16 val) +{ + nft_reg_store16(dreg, (__force __u16)val); +} + +static inline u16 nft_reg_load16(const u32 *sreg) +{ + return *(u16 *)sreg; +} + +static inline __be16 nft_reg_load_be16(const u32 *sreg) +{ + return (__force __be16)nft_reg_load16(sreg); +} + +static inline __be32 nft_reg_load_be32(const u32 *sreg) +{ + return *(__force __be32 *)sreg; +} + +static inline void nft_reg_store64(u64 *dreg, u64 val) +{ + put_unaligned(val, dreg); +} + +static inline u64 nft_reg_load64(const u32 *sreg) +{ + return get_unaligned((u64 *)sreg); +} + +static inline void nft_data_copy(u32 *dst, const struct nft_data *src, + unsigned int len) +{ + if (len % NFT_REG32_SIZE) + dst[len / NFT_REG32_SIZE] = 0; + memcpy(dst, src, len); +} + +/** + * struct nft_ctx - nf_tables rule/set context + * + * @net: net namespace + * @table: the table the chain is contained in + * @chain: the chain the rule is contained in + * @nla: netlink attributes + * @portid: netlink portID of the original message + * @seq: netlink sequence number + * @family: protocol family + * @level: depth of the chains + * @report: notify via unicast netlink message + */ +struct nft_ctx { + struct net *net; + struct nft_table *table; + struct nft_chain *chain; + const struct nlattr * const *nla; + u32 portid; + u32 seq; + u16 flags; + u8 family; + u8 level; + bool report; +}; + +enum nft_data_desc_flags { + NFT_DATA_DESC_SETELEM = (1 << 0), +}; + +struct nft_data_desc { + enum nft_data_types type; + unsigned int size; + unsigned int len; + unsigned int flags; +}; + +int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla); +void nft_data_hold(const struct nft_data *data, enum nft_data_types type); +void nft_data_release(const struct nft_data *data, enum nft_data_types type); +int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, + enum nft_data_types type, unsigned int len); + +static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg) +{ + return reg == NFT_REG_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE; +} + +static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) +{ + return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE; +} + +int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); +int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); + +int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len); +int nft_parse_register_store(const struct nft_ctx *ctx, + const struct nlattr *attr, u8 *dreg, + const struct nft_data *data, + enum nft_data_types type, unsigned int len); + +/** + * struct nft_userdata - user defined data associated with an object + * + * @len: length of the data + * @data: content + * + * The presence of user data is indicated in an object specific fashion, + * so a length of zero can't occur and the value "len" indicates data + * of length len + 1. + */ +struct nft_userdata { + u8 len; + unsigned char data[]; +}; + +/** + * struct nft_set_elem - generic representation of set elements + * + * @key: element key + * @key_end: closing element key + * @priv: element private data and extensions + */ +struct nft_set_elem { + union { + u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)]; + struct nft_data val; + } key; + union { + u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)]; + struct nft_data val; + } key_end; + union { + u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)]; + struct nft_data val; + } data; + void *priv; +}; + +struct nft_set; +struct nft_set_iter { + u8 genmask; + unsigned int count; + unsigned int skip; + int err; + int (*fn)(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem); +}; + +/** + * struct nft_set_desc - description of set elements + * + * @ktype: key type + * @klen: key length + * @dtype: data type + * @dlen: data length + * @objtype: object type + * @flags: flags + * @size: number of set elements + * @policy: set policy + * @gc_int: garbage collector interval + * @field_len: length of each field in concatenation, bytes + * @field_count: number of concatenated fields in element + * @expr: set must support for expressions + */ +struct nft_set_desc { + u32 ktype; + unsigned int klen; + u32 dtype; + unsigned int dlen; + u32 objtype; + unsigned int size; + u32 policy; + u32 gc_int; + u64 timeout; + u8 field_len[NFT_REG32_COUNT]; + u8 field_count; + bool expr; +}; + +/** + * enum nft_set_class - performance class + * + * @NFT_LOOKUP_O_1: constant, O(1) + * @NFT_LOOKUP_O_LOG_N: logarithmic, O(log N) + * @NFT_LOOKUP_O_N: linear, O(N) + */ +enum nft_set_class { + NFT_SET_CLASS_O_1, + NFT_SET_CLASS_O_LOG_N, + NFT_SET_CLASS_O_N, +}; + +/** + * struct nft_set_estimate - estimation of memory and performance + * characteristics + * + * @size: required memory + * @lookup: lookup performance class + * @space: memory class + */ +struct nft_set_estimate { + u64 size; + enum nft_set_class lookup; + enum nft_set_class space; +}; + +#define NFT_EXPR_MAXATTR 16 +#define NFT_EXPR_SIZE(size) (sizeof(struct nft_expr) + \ + ALIGN(size, __alignof__(struct nft_expr))) + +/** + * struct nft_expr - nf_tables expression + * + * @ops: expression ops + * @data: expression private data + */ +struct nft_expr { + const struct nft_expr_ops *ops; + unsigned char data[] + __attribute__((aligned(__alignof__(u64)))); +}; + +static inline void *nft_expr_priv(const struct nft_expr *expr) +{ + return (void *)expr->data; +} + +struct nft_expr_info; + +int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla, + struct nft_expr_info *info); +int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src); +void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); +int nft_expr_dump(struct sk_buff *skb, unsigned int attr, + const struct nft_expr *expr, bool reset); +bool nft_expr_reduce_bitwise(struct nft_regs_track *track, + const struct nft_expr *expr); + +struct nft_set_ext; + +/** + * struct nft_set_ops - nf_tables set operations + * + * @lookup: look up an element within the set + * @update: update an element if exists, add it if doesn't exist + * @delete: delete an element + * @insert: insert new element into set + * @activate: activate new element in the next generation + * @deactivate: lookup for element and deactivate it in the next generation + * @flush: deactivate element in the next generation + * @remove: remove element from set + * @walk: iterate over all set elements + * @get: get set elements + * @privsize: function to return size of set private data + * @init: initialize private data of new set instance + * @destroy: destroy private data of set instance + * @elemsize: element private size + * + * Operations lookup, update and delete have simpler interfaces, are faster + * and currently only used in the packet path. All the rest are slower, + * control plane functions. + */ +struct nft_set_ops { + bool (*lookup)(const struct net *net, + const struct nft_set *set, + const u32 *key, + const struct nft_set_ext **ext); + bool (*update)(struct nft_set *set, + const u32 *key, + void *(*new)(struct nft_set *, + const struct nft_expr *, + struct nft_regs *), + const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_set_ext **ext); + bool (*delete)(const struct nft_set *set, + const u32 *key); + + int (*insert)(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem, + struct nft_set_ext **ext); + void (*activate)(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem); + void * (*deactivate)(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem); + bool (*flush)(const struct net *net, + const struct nft_set *set, + void *priv); + void (*remove)(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem); + void (*walk)(const struct nft_ctx *ctx, + struct nft_set *set, + struct nft_set_iter *iter); + void * (*get)(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem, + unsigned int flags); + void (*commit)(const struct nft_set *set); + void (*abort)(const struct nft_set *set); + u64 (*privsize)(const struct nlattr * const nla[], + const struct nft_set_desc *desc); + bool (*estimate)(const struct nft_set_desc *desc, + u32 features, + struct nft_set_estimate *est); + int (*init)(const struct nft_set *set, + const struct nft_set_desc *desc, + const struct nlattr * const nla[]); + void (*destroy)(const struct nft_ctx *ctx, + const struct nft_set *set); + void (*gc_init)(const struct nft_set *set); + + unsigned int elemsize; +}; + +/** + * struct nft_set_type - nf_tables set type + * + * @ops: set ops for this type + * @features: features supported by the implementation + */ +struct nft_set_type { + const struct nft_set_ops ops; + u32 features; +}; +#define to_set_type(o) container_of(o, struct nft_set_type, ops) + +struct nft_set_elem_expr { + u8 size; + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_expr)))); +}; + +#define nft_setelem_expr_at(__elem_expr, __offset) \ + ((struct nft_expr *)&__elem_expr->data[__offset]) + +#define nft_setelem_expr_foreach(__expr, __elem_expr, __size) \ + for (__expr = nft_setelem_expr_at(__elem_expr, 0), __size = 0; \ + __size < (__elem_expr)->size; \ + __size += (__expr)->ops->size, __expr = ((void *)(__expr)) + (__expr)->ops->size) + +#define NFT_SET_EXPR_MAX 2 + +/** + * struct nft_set - nf_tables set instance + * + * @list: table set list node + * @bindings: list of set bindings + * @refs: internal refcounting for async set destruction + * @table: table this set belongs to + * @net: netnamespace this set belongs to + * @name: name of the set + * @handle: unique handle of the set + * @ktype: key type (numeric type defined by userspace, not used in the kernel) + * @dtype: data type (verdict or numeric type defined by userspace) + * @objtype: object type (see NFT_OBJECT_* definitions) + * @size: maximum set size + * @field_len: length of each field in concatenation, bytes + * @field_count: number of concatenated fields in element + * @use: number of rules references to this set + * @nelems: number of elements + * @ndeact: number of deactivated elements queued for removal + * @timeout: default timeout value in jiffies + * @gc_int: garbage collection interval in msecs + * @policy: set parameterization (see enum nft_set_policies) + * @udlen: user data length + * @udata: user data + * @expr: stateful expression + * @ops: set ops + * @flags: set flags + * @dead: set will be freed, never cleared + * @genmask: generation mask + * @klen: key length + * @dlen: data length + * @data: private set data + */ +struct nft_set { + struct list_head list; + struct list_head bindings; + refcount_t refs; + struct nft_table *table; + possible_net_t net; + char *name; + u64 handle; + u32 ktype; + u32 dtype; + u32 objtype; + u32 size; + u8 field_len[NFT_REG32_COUNT]; + u8 field_count; + u32 use; + atomic_t nelems; + u32 ndeact; + u64 timeout; + u32 gc_int; + u16 policy; + u16 udlen; + unsigned char *udata; + struct list_head pending_update; + /* runtime data below here */ + const struct nft_set_ops *ops ____cacheline_aligned; + u16 flags:13, + dead:1, + genmask:2; + u8 klen; + u8 dlen; + u8 num_exprs; + struct nft_expr *exprs[NFT_SET_EXPR_MAX]; + struct list_head catchall_list; + unsigned char data[] + __attribute__((aligned(__alignof__(u64)))); +}; + +static inline bool nft_set_is_anonymous(const struct nft_set *set) +{ + return set->flags & NFT_SET_ANONYMOUS; +} + +static inline void *nft_set_priv(const struct nft_set *set) +{ + return (void *)set->data; +} + +static inline bool nft_set_gc_is_pending(const struct nft_set *s) +{ + return refcount_read(&s->refs) != 1; +} + +static inline struct nft_set *nft_set_container_of(const void *priv) +{ + return (void *)priv - offsetof(struct nft_set, data); +} + +struct nft_set *nft_set_lookup_global(const struct net *net, + const struct nft_table *table, + const struct nlattr *nla_set_name, + const struct nlattr *nla_set_id, + u8 genmask); + +struct nft_set_ext *nft_set_catchall_lookup(const struct net *net, + const struct nft_set *set); + +static inline unsigned long nft_set_gc_interval(const struct nft_set *set) +{ + u32 gc_int = READ_ONCE(set->gc_int); + + return gc_int ? msecs_to_jiffies(gc_int) : HZ; +} + +/** + * struct nft_set_binding - nf_tables set binding + * + * @list: set bindings list node + * @chain: chain containing the rule bound to the set + * @flags: set action flags + * + * A set binding contains all information necessary for validation + * of new elements added to a bound set. + */ +struct nft_set_binding { + struct list_head list; + const struct nft_chain *chain; + u32 flags; +}; + +enum nft_trans_phase; +void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set); +void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding, + enum nft_trans_phase phase); +int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding); +void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set); + +/** + * enum nft_set_extensions - set extension type IDs + * + * @NFT_SET_EXT_KEY: element key + * @NFT_SET_EXT_KEY_END: upper bound element key, for ranges + * @NFT_SET_EXT_DATA: mapping data + * @NFT_SET_EXT_FLAGS: element flags + * @NFT_SET_EXT_TIMEOUT: element timeout + * @NFT_SET_EXT_EXPIRATION: element expiration time + * @NFT_SET_EXT_USERDATA: user data associated with the element + * @NFT_SET_EXT_EXPRESSIONS: expressions assiciated with the element + * @NFT_SET_EXT_OBJREF: stateful object reference associated with element + * @NFT_SET_EXT_NUM: number of extension types + */ +enum nft_set_extensions { + NFT_SET_EXT_KEY, + NFT_SET_EXT_KEY_END, + NFT_SET_EXT_DATA, + NFT_SET_EXT_FLAGS, + NFT_SET_EXT_TIMEOUT, + NFT_SET_EXT_EXPIRATION, + NFT_SET_EXT_USERDATA, + NFT_SET_EXT_EXPRESSIONS, + NFT_SET_EXT_OBJREF, + NFT_SET_EXT_NUM +}; + +/** + * struct nft_set_ext_type - set extension type + * + * @len: fixed part length of the extension + * @align: alignment requirements of the extension + */ +struct nft_set_ext_type { + u8 len; + u8 align; +}; + +extern const struct nft_set_ext_type nft_set_ext_types[]; + +/** + * struct nft_set_ext_tmpl - set extension template + * + * @len: length of extension area + * @offset: offsets of individual extension types + */ +struct nft_set_ext_tmpl { + u16 len; + u8 offset[NFT_SET_EXT_NUM]; + u8 ext_len[NFT_SET_EXT_NUM]; +}; + +/** + * struct nft_set_ext - set extensions + * + * @genmask: generation mask + * @offset: offsets of individual extension types + * @data: beginning of extension data + */ +struct nft_set_ext { + u8 genmask; + u8 offset[NFT_SET_EXT_NUM]; + char data[]; +}; + +static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl) +{ + memset(tmpl, 0, sizeof(*tmpl)); + tmpl->len = sizeof(struct nft_set_ext); +} + +static inline int nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, + unsigned int len) +{ + tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align); + if (tmpl->len > U8_MAX) + return -EINVAL; + + tmpl->offset[id] = tmpl->len; + tmpl->ext_len[id] = nft_set_ext_types[id].len + len; + tmpl->len += tmpl->ext_len[id]; + + return 0; +} + +static inline int nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) +{ + return nft_set_ext_add_length(tmpl, id, 0); +} + +static inline void nft_set_ext_init(struct nft_set_ext *ext, + const struct nft_set_ext_tmpl *tmpl) +{ + memcpy(ext->offset, tmpl->offset, sizeof(ext->offset)); +} + +static inline bool __nft_set_ext_exists(const struct nft_set_ext *ext, u8 id) +{ + return !!ext->offset[id]; +} + +static inline bool nft_set_ext_exists(const struct nft_set_ext *ext, u8 id) +{ + return ext && __nft_set_ext_exists(ext, id); +} + +static inline void *nft_set_ext(const struct nft_set_ext *ext, u8 id) +{ + return (void *)ext + ext->offset[id]; +} + +static inline struct nft_data *nft_set_ext_key(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_KEY); +} + +static inline struct nft_data *nft_set_ext_key_end(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_KEY_END); +} + +static inline struct nft_data *nft_set_ext_data(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_DATA); +} + +static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_FLAGS); +} + +static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT); +} + +static inline u64 *nft_set_ext_expiration(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION); +} + +static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_USERDATA); +} + +static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_EXPRESSIONS); +} + +static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) +{ + return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && + time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext)); +} + +static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, + void *elem) +{ + return elem + set->ops->elemsize; +} + +static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_OBJREF); +} + +struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nlattr *attr); + +void *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const u32 *key, const u32 *key_end, const u32 *data, + u64 timeout, u64 expiration, gfp_t gfp); +int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_expr *expr_array[]); +void nft_set_elem_destroy(const struct nft_set *set, void *elem, + bool destroy_expr); +void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, void *elem); + +struct nft_expr_ops; +/** + * struct nft_expr_type - nf_tables expression type + * + * @select_ops: function to select nft_expr_ops + * @release_ops: release nft_expr_ops + * @ops: default ops, used when no select_ops functions is present + * @list: used internally + * @name: Identifier + * @owner: module reference + * @policy: netlink attribute policy + * @maxattr: highest netlink attribute number + * @family: address family for AF-specific types + * @flags: expression type flags + */ +struct nft_expr_type { + const struct nft_expr_ops *(*select_ops)(const struct nft_ctx *, + const struct nlattr * const tb[]); + void (*release_ops)(const struct nft_expr_ops *ops); + const struct nft_expr_ops *ops; + const struct nft_expr_ops *inner_ops; + struct list_head list; + const char *name; + struct module *owner; + const struct nla_policy *policy; + unsigned int maxattr; + u8 family; + u8 flags; +}; + +#define NFT_EXPR_STATEFUL 0x1 +#define NFT_EXPR_GC 0x2 + +enum nft_trans_phase { + NFT_TRANS_PREPARE, + NFT_TRANS_PREPARE_ERROR, + NFT_TRANS_ABORT, + NFT_TRANS_COMMIT, + NFT_TRANS_RELEASE +}; + +struct nft_flow_rule; +struct nft_offload_ctx; + +/** + * struct nft_expr_ops - nf_tables expression operations + * + * @eval: Expression evaluation function + * @size: full expression size, including private data size + * @init: initialization function + * @activate: activate expression in the next generation + * @deactivate: deactivate expression in next generation + * @destroy: destruction function, called after synchronize_rcu + * @dump: function to dump parameters + * @type: expression type + * @validate: validate expression, called during loop detection + * @data: extra data to attach to this expression operation + */ +struct nft_expr_ops { + void (*eval)(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt); + int (*clone)(struct nft_expr *dst, + const struct nft_expr *src); + unsigned int size; + + int (*init)(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]); + void (*activate)(const struct nft_ctx *ctx, + const struct nft_expr *expr); + void (*deactivate)(const struct nft_ctx *ctx, + const struct nft_expr *expr, + enum nft_trans_phase phase); + void (*destroy)(const struct nft_ctx *ctx, + const struct nft_expr *expr); + void (*destroy_clone)(const struct nft_ctx *ctx, + const struct nft_expr *expr); + int (*dump)(struct sk_buff *skb, + const struct nft_expr *expr, + bool reset); + int (*validate)(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data); + bool (*reduce)(struct nft_regs_track *track, + const struct nft_expr *expr); + bool (*gc)(struct net *net, + const struct nft_expr *expr); + int (*offload)(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_expr *expr); + bool (*offload_action)(const struct nft_expr *expr); + void (*offload_stats)(struct nft_expr *expr, + const struct flow_stats *stats); + const struct nft_expr_type *type; + void *data; +}; + +/** + * struct nft_rule - nf_tables rule + * + * @list: used internally + * @handle: rule handle + * @genmask: generation mask + * @dlen: length of expression data + * @udata: user data is appended to the rule + * @data: expression data + */ +struct nft_rule { + struct list_head list; + u64 handle:42, + genmask:2, + dlen:12, + udata:1; + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_expr)))); +}; + +static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) +{ + return (struct nft_expr *)&rule->data[0]; +} + +static inline struct nft_expr *nft_expr_next(const struct nft_expr *expr) +{ + return ((void *)expr) + expr->ops->size; +} + +static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule) +{ + return (struct nft_expr *)&rule->data[rule->dlen]; +} + +static inline bool nft_expr_more(const struct nft_rule *rule, + const struct nft_expr *expr) +{ + return expr != nft_expr_last(rule) && expr->ops; +} + +static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule) +{ + return (void *)&rule->data[rule->dlen]; +} + +void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule); +void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule, + enum nft_trans_phase phase); +void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule); + +static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_set_elem_expr *elem_expr; + struct nft_expr *expr; + u32 size; + + if (__nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS)) { + elem_expr = nft_set_ext_expr(ext); + nft_setelem_expr_foreach(expr, elem_expr, size) { + expr->ops->eval(expr, regs, pkt); + if (regs->verdict.code == NFT_BREAK) + return; + } + } +} + +/* + * The last pointer isn't really necessary, but the compiler isn't able to + * determine that the result of nft_expr_last() is always the same since it + * can't assume that the dlen value wasn't changed within calls in the loop. + */ +#define nft_rule_for_each_expr(expr, last, rule) \ + for ((expr) = nft_expr_first(rule), (last) = nft_expr_last(rule); \ + (expr) != (last); \ + (expr) = nft_expr_next(expr)) + +#define NFT_CHAIN_POLICY_UNSET U8_MAX + +struct nft_rule_dp { + u64 is_last:1, + dlen:12, + handle:42; /* for tracing */ + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_expr)))); +}; + +struct nft_rule_dp_last { + struct nft_rule_dp end; /* end of nft_rule_blob marker */ + struct rcu_head h; /* call_rcu head */ + struct nft_rule_blob *blob; /* ptr to free via call_rcu */ + const struct nft_chain *chain; /* for nftables tracing */ +}; + +static inline const struct nft_rule_dp *nft_rule_next(const struct nft_rule_dp *rule) +{ + return (void *)rule + sizeof(*rule) + rule->dlen; +} + +struct nft_rule_blob { + unsigned long size; + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_rule_dp)))); +}; + +/** + * struct nft_chain - nf_tables chain + * + * @rules: list of rules in the chain + * @list: used internally + * @rhlhead: used internally + * @table: table that this chain belongs to + * @handle: chain handle + * @use: number of jump references to this chain + * @flags: bitmask of enum nft_chain_flags + * @name: name of the chain + */ +struct nft_chain { + struct nft_rule_blob __rcu *blob_gen_0; + struct nft_rule_blob __rcu *blob_gen_1; + struct list_head rules; + struct list_head list; + struct rhlist_head rhlhead; + struct nft_table *table; + u64 handle; + u32 use; + u8 flags:5, + bound:1, + genmask:2; + char *name; + u16 udlen; + u8 *udata; + + /* Only used during control plane commit phase: */ + struct nft_rule_blob *blob_next; +}; + +int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); +int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem); +int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set); +int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); +void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); + +enum nft_chain_types { + NFT_CHAIN_T_DEFAULT = 0, + NFT_CHAIN_T_ROUTE, + NFT_CHAIN_T_NAT, + NFT_CHAIN_T_MAX +}; + +/** + * struct nft_chain_type - nf_tables chain type info + * + * @name: name of the type + * @type: numeric identifier + * @family: address family + * @owner: module owner + * @hook_mask: mask of valid hooks + * @hooks: array of hook functions + * @ops_register: base chain register function + * @ops_unregister: base chain unregister function + */ +struct nft_chain_type { + const char *name; + enum nft_chain_types type; + int family; + struct module *owner; + unsigned int hook_mask; + nf_hookfn *hooks[NFT_MAX_HOOKS]; + int (*ops_register)(struct net *net, const struct nf_hook_ops *ops); + void (*ops_unregister)(struct net *net, const struct nf_hook_ops *ops); +}; + +int nft_chain_validate_dependency(const struct nft_chain *chain, + enum nft_chain_types type); +int nft_chain_validate_hooks(const struct nft_chain *chain, + unsigned int hook_flags); + +static inline bool nft_chain_binding(const struct nft_chain *chain) +{ + return chain->flags & NFT_CHAIN_BINDING; +} + +static inline bool nft_chain_is_bound(struct nft_chain *chain) +{ + return (chain->flags & NFT_CHAIN_BINDING) && chain->bound; +} + +int nft_chain_add(struct nft_table *table, struct nft_chain *chain); +void nft_chain_del(struct nft_chain *chain); +void nf_tables_chain_destroy(struct nft_ctx *ctx); + +struct nft_stats { + u64 bytes; + u64 pkts; + struct u64_stats_sync syncp; +}; + +struct nft_hook { + struct list_head list; + struct nf_hook_ops ops; + struct rcu_head rcu; +}; + +/** + * struct nft_base_chain - nf_tables base chain + * + * @ops: netfilter hook ops + * @hook_list: list of netfilter hooks (for NFPROTO_NETDEV family) + * @type: chain type + * @policy: default policy + * @stats: per-cpu chain stats + * @chain: the chain + * @flow_block: flow block (for hardware offload) + */ +struct nft_base_chain { + struct nf_hook_ops ops; + struct list_head hook_list; + const struct nft_chain_type *type; + u8 policy; + u8 flags; + struct nft_stats __percpu *stats; + struct nft_chain chain; + struct flow_block flow_block; +}; + +static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain) +{ + return container_of(chain, struct nft_base_chain, chain); +} + +static inline bool nft_is_base_chain(const struct nft_chain *chain) +{ + return chain->flags & NFT_CHAIN_BASE; +} + +int __nft_release_basechain(struct nft_ctx *ctx); + +unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); + +static inline bool nft_use_inc(u32 *use) +{ + if (*use == UINT_MAX) + return false; + + (*use)++; + + return true; +} + +static inline void nft_use_dec(u32 *use) +{ + WARN_ON_ONCE((*use)-- == 0); +} + +/* For error and abort path: restore use counter to previous state. */ +static inline void nft_use_inc_restore(u32 *use) +{ + WARN_ON_ONCE(!nft_use_inc(use)); +} + +#define nft_use_dec_restore nft_use_dec + +/** + * struct nft_table - nf_tables table + * + * @list: used internally + * @chains_ht: chains in the table + * @chains: same, for stable walks + * @sets: sets in the table + * @objects: stateful objects in the table + * @flowtables: flow tables in the table + * @hgenerator: handle generator state + * @handle: table handle + * @use: number of chain references to this table + * @flags: table flag (see enum nft_table_flags) + * @genmask: generation mask + * @afinfo: address family info + * @name: name of the table + * @validate_state: internal, set when transaction adds jumps + */ +struct nft_table { + struct list_head list; + struct rhltable chains_ht; + struct list_head chains; + struct list_head sets; + struct list_head objects; + struct list_head flowtables; + u64 hgenerator; + u64 handle; + u32 use; + u16 family:6, + flags:8, + genmask:2; + u32 nlpid; + char *name; + u16 udlen; + u8 *udata; + u8 validate_state; +}; + +static inline bool nft_table_has_owner(const struct nft_table *table) +{ + return table->flags & NFT_TABLE_F_OWNER; +} + +static inline bool nft_base_chain_netdev(int family, u32 hooknum) +{ + return family == NFPROTO_NETDEV || + (family == NFPROTO_INET && hooknum == NF_INET_INGRESS); +} + +void nft_register_chain_type(const struct nft_chain_type *); +void nft_unregister_chain_type(const struct nft_chain_type *); + +int nft_register_expr(struct nft_expr_type *); +void nft_unregister_expr(struct nft_expr_type *); + +int nft_verdict_dump(struct sk_buff *skb, int type, + const struct nft_verdict *v); + +/** + * struct nft_object_hash_key - key to lookup nft_object + * + * @name: name of the stateful object to look up + * @table: table the object belongs to + */ +struct nft_object_hash_key { + const char *name; + const struct nft_table *table; +}; + +/** + * struct nft_object - nf_tables stateful object + * + * @list: table stateful object list node + * @key: keys that identify this object + * @rhlhead: nft_objname_ht node + * @genmask: generation mask + * @use: number of references to this stateful object + * @handle: unique object handle + * @ops: object operations + * @data: object data, layout depends on type + */ +struct nft_object { + struct list_head list; + struct rhlist_head rhlhead; + struct nft_object_hash_key key; + u32 genmask:2; + u32 use; + u64 handle; + u16 udlen; + u8 *udata; + /* runtime data below here */ + const struct nft_object_ops *ops ____cacheline_aligned; + unsigned char data[] + __attribute__((aligned(__alignof__(u64)))); +}; + +static inline void *nft_obj_data(const struct nft_object *obj) +{ + return (void *)obj->data; +} + +#define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr)) + +struct nft_object *nft_obj_lookup(const struct net *net, + const struct nft_table *table, + const struct nlattr *nla, u32 objtype, + u8 genmask); + +void nft_obj_notify(struct net *net, const struct nft_table *table, + struct nft_object *obj, u32 portid, u32 seq, + int event, u16 flags, int family, int report, gfp_t gfp); + +/** + * struct nft_object_type - stateful object type + * + * @select_ops: function to select nft_object_ops + * @ops: default ops, used when no select_ops functions is present + * @list: list node in list of object types + * @type: stateful object numeric type + * @owner: module owner + * @maxattr: maximum netlink attribute + * @policy: netlink attribute policy + */ +struct nft_object_type { + const struct nft_object_ops *(*select_ops)(const struct nft_ctx *, + const struct nlattr * const tb[]); + const struct nft_object_ops *ops; + struct list_head list; + u32 type; + unsigned int maxattr; + struct module *owner; + const struct nla_policy *policy; +}; + +/** + * struct nft_object_ops - stateful object operations + * + * @eval: stateful object evaluation function + * @size: stateful object size + * @init: initialize object from netlink attributes + * @destroy: release existing stateful object + * @dump: netlink dump stateful object + * @update: update stateful object + */ +struct nft_object_ops { + void (*eval)(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt); + unsigned int size; + int (*init)(const struct nft_ctx *ctx, + const struct nlattr *const tb[], + struct nft_object *obj); + void (*destroy)(const struct nft_ctx *ctx, + struct nft_object *obj); + int (*dump)(struct sk_buff *skb, + struct nft_object *obj, + bool reset); + void (*update)(struct nft_object *obj, + struct nft_object *newobj); + const struct nft_object_type *type; +}; + +int nft_register_obj(struct nft_object_type *obj_type); +void nft_unregister_obj(struct nft_object_type *obj_type); + +#define NFT_NETDEVICE_MAX 256 + +/** + * struct nft_flowtable - nf_tables flow table + * + * @list: flow table list node in table list + * @table: the table the flow table is contained in + * @name: name of this flow table + * @hooknum: hook number + * @ops_len: number of hooks in array + * @genmask: generation mask + * @use: number of references to this flow table + * @handle: unique object handle + * @dev_name: array of device names + * @data: rhashtable and garbage collector + * @ops: array of hooks + */ +struct nft_flowtable { + struct list_head list; + struct nft_table *table; + char *name; + int hooknum; + int ops_len; + u32 genmask:2; + u32 use; + u64 handle; + /* runtime data below here */ + struct list_head hook_list ____cacheline_aligned; + struct nf_flowtable data; +}; + +struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table, + const struct nlattr *nla, + u8 genmask); + +void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx, + struct nft_flowtable *flowtable, + enum nft_trans_phase phase); + +void nft_register_flowtable_type(struct nf_flowtable_type *type); +void nft_unregister_flowtable_type(struct nf_flowtable_type *type); + +/** + * struct nft_traceinfo - nft tracing information and state + * + * @trace: other struct members are initialised + * @nf_trace: copy of skb->nf_trace before rule evaluation + * @type: event type (enum nft_trace_types) + * @skbid: hash of skb to be used as trace id + * @packet_dumped: packet headers sent in a previous traceinfo message + * @basechain: base chain currently processed + */ +struct nft_traceinfo { + bool trace; + bool nf_trace; + bool packet_dumped; + enum nft_trace_types type:8; + u32 skbid; + const struct nft_base_chain *basechain; +}; + +void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, + const struct nft_chain *basechain); + +void nft_trace_notify(const struct nft_pktinfo *pkt, + const struct nft_verdict *verdict, + const struct nft_rule_dp *rule, + struct nft_traceinfo *info); + +#define MODULE_ALIAS_NFT_CHAIN(family, name) \ + MODULE_ALIAS("nft-chain-" __stringify(family) "-" name) + +#define MODULE_ALIAS_NFT_AF_EXPR(family, name) \ + MODULE_ALIAS("nft-expr-" __stringify(family) "-" name) + +#define MODULE_ALIAS_NFT_EXPR(name) \ + MODULE_ALIAS("nft-expr-" name) + +#define MODULE_ALIAS_NFT_OBJ(type) \ + MODULE_ALIAS("nft-obj-" __stringify(type)) + +#if IS_ENABLED(CONFIG_NF_TABLES) + +/* + * The gencursor defines two generations, the currently active and the + * next one. Objects contain a bitmask of 2 bits specifying the generations + * they're active in. A set bit means they're inactive in the generation + * represented by that bit. + * + * New objects start out as inactive in the current and active in the + * next generation. When committing the ruleset the bitmask is cleared, + * meaning they're active in all generations. When removing an object, + * it is set inactive in the next generation. After committing the ruleset, + * the objects are removed. + */ +static inline unsigned int nft_gencursor_next(const struct net *net) +{ + return net->nft.gencursor + 1 == 1 ? 1 : 0; +} + +static inline u8 nft_genmask_next(const struct net *net) +{ + return 1 << nft_gencursor_next(net); +} + +static inline u8 nft_genmask_cur(const struct net *net) +{ + /* Use READ_ONCE() to prevent refetching the value for atomicity */ + return 1 << READ_ONCE(net->nft.gencursor); +} + +#define NFT_GENMASK_ANY ((1 << 0) | (1 << 1)) + +/* + * Generic transaction helpers + */ + +/* Check if this object is currently active. */ +#define nft_is_active(__net, __obj) \ + (((__obj)->genmask & nft_genmask_cur(__net)) == 0) + +/* Check if this object is active in the next generation. */ +#define nft_is_active_next(__net, __obj) \ + (((__obj)->genmask & nft_genmask_next(__net)) == 0) + +/* This object becomes active in the next generation. */ +#define nft_activate_next(__net, __obj) \ + (__obj)->genmask = nft_genmask_cur(__net) + +/* This object becomes inactive in the next generation. */ +#define nft_deactivate_next(__net, __obj) \ + (__obj)->genmask = nft_genmask_next(__net) + +/* After committing the ruleset, clear the stale generation bit. */ +#define nft_clear(__net, __obj) \ + (__obj)->genmask &= ~nft_genmask_next(__net) +#define nft_active_genmask(__obj, __genmask) \ + !((__obj)->genmask & __genmask) + +/* + * Set element transaction helpers + */ + +static inline bool nft_set_elem_active(const struct nft_set_ext *ext, + u8 genmask) +{ + return !(ext->genmask & genmask); +} + +static inline void nft_set_elem_change_active(const struct net *net, + const struct nft_set *set, + struct nft_set_ext *ext) +{ + ext->genmask ^= nft_genmask_next(net); +} + +#endif /* IS_ENABLED(CONFIG_NF_TABLES) */ + +#define NFT_SET_ELEM_DEAD_MASK (1 << 2) + +#if defined(__LITTLE_ENDIAN_BITFIELD) +#define NFT_SET_ELEM_DEAD_BIT 2 +#elif defined(__BIG_ENDIAN_BITFIELD) +#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2) +#else +#error +#endif + +static inline void nft_set_elem_dead(struct nft_set_ext *ext) +{ + unsigned long *word = (unsigned long *)ext; + + BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); + set_bit(NFT_SET_ELEM_DEAD_BIT, word); +} + +static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) +{ + unsigned long *word = (unsigned long *)ext; + + BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); + return test_bit(NFT_SET_ELEM_DEAD_BIT, word); +} + +/** + * struct nft_trans - nf_tables object update in transaction + * + * @list: used internally + * @binding_list: list of objects with possible bindings + * @msg_type: message type + * @put_net: ctx->net needs to be put + * @ctx: transaction context + * @data: internal information related to the transaction + */ +struct nft_trans { + struct list_head list; + struct list_head binding_list; + int msg_type; + bool put_net; + struct nft_ctx ctx; + char data[]; +}; + +struct nft_trans_rule { + struct nft_rule *rule; + struct nft_flow_rule *flow; + u32 rule_id; + bool bound; +}; + +#define nft_trans_rule(trans) \ + (((struct nft_trans_rule *)trans->data)->rule) +#define nft_trans_flow_rule(trans) \ + (((struct nft_trans_rule *)trans->data)->flow) +#define nft_trans_rule_id(trans) \ + (((struct nft_trans_rule *)trans->data)->rule_id) +#define nft_trans_rule_bound(trans) \ + (((struct nft_trans_rule *)trans->data)->bound) + +struct nft_trans_set { + struct nft_set *set; + u32 set_id; + u32 gc_int; + u64 timeout; + bool update; + bool bound; + u32 size; +}; + +#define nft_trans_set(trans) \ + (((struct nft_trans_set *)trans->data)->set) +#define nft_trans_set_id(trans) \ + (((struct nft_trans_set *)trans->data)->set_id) +#define nft_trans_set_bound(trans) \ + (((struct nft_trans_set *)trans->data)->bound) +#define nft_trans_set_update(trans) \ + (((struct nft_trans_set *)trans->data)->update) +#define nft_trans_set_timeout(trans) \ + (((struct nft_trans_set *)trans->data)->timeout) +#define nft_trans_set_gc_int(trans) \ + (((struct nft_trans_set *)trans->data)->gc_int) +#define nft_trans_set_size(trans) \ + (((struct nft_trans_set *)trans->data)->size) + +struct nft_trans_chain { + struct nft_chain *chain; + bool update; + char *name; + struct nft_stats __percpu *stats; + u8 policy; + bool bound; + u32 chain_id; + struct nft_base_chain *basechain; + struct list_head hook_list; +}; + +#define nft_trans_chain(trans) \ + (((struct nft_trans_chain *)trans->data)->chain) +#define nft_trans_chain_update(trans) \ + (((struct nft_trans_chain *)trans->data)->update) +#define nft_trans_chain_name(trans) \ + (((struct nft_trans_chain *)trans->data)->name) +#define nft_trans_chain_stats(trans) \ + (((struct nft_trans_chain *)trans->data)->stats) +#define nft_trans_chain_policy(trans) \ + (((struct nft_trans_chain *)trans->data)->policy) +#define nft_trans_chain_bound(trans) \ + (((struct nft_trans_chain *)trans->data)->bound) +#define nft_trans_chain_id(trans) \ + (((struct nft_trans_chain *)trans->data)->chain_id) +#define nft_trans_basechain(trans) \ + (((struct nft_trans_chain *)trans->data)->basechain) +#define nft_trans_chain_hooks(trans) \ + (((struct nft_trans_chain *)trans->data)->hook_list) + +struct nft_trans_table { + bool update; +}; + +#define nft_trans_table_update(trans) \ + (((struct nft_trans_table *)trans->data)->update) + +struct nft_trans_elem { + struct nft_set *set; + struct nft_set_elem elem; + bool bound; +}; + +#define nft_trans_elem_set(trans) \ + (((struct nft_trans_elem *)trans->data)->set) +#define nft_trans_elem(trans) \ + (((struct nft_trans_elem *)trans->data)->elem) +#define nft_trans_elem_set_bound(trans) \ + (((struct nft_trans_elem *)trans->data)->bound) + +struct nft_trans_obj { + struct nft_object *obj; + struct nft_object *newobj; + bool update; +}; + +#define nft_trans_obj(trans) \ + (((struct nft_trans_obj *)trans->data)->obj) +#define nft_trans_obj_newobj(trans) \ + (((struct nft_trans_obj *)trans->data)->newobj) +#define nft_trans_obj_update(trans) \ + (((struct nft_trans_obj *)trans->data)->update) + +struct nft_trans_flowtable { + struct nft_flowtable *flowtable; + bool update; + struct list_head hook_list; + u32 flags; +}; + +#define nft_trans_flowtable(trans) \ + (((struct nft_trans_flowtable *)trans->data)->flowtable) +#define nft_trans_flowtable_update(trans) \ + (((struct nft_trans_flowtable *)trans->data)->update) +#define nft_trans_flowtable_hooks(trans) \ + (((struct nft_trans_flowtable *)trans->data)->hook_list) +#define nft_trans_flowtable_flags(trans) \ + (((struct nft_trans_flowtable *)trans->data)->flags) + +#define NFT_TRANS_GC_BATCHCOUNT 256 + +struct nft_trans_gc { + struct list_head list; + struct net *net; + struct nft_set *set; + u32 seq; + u16 count; + void *priv[NFT_TRANS_GC_BATCHCOUNT]; + struct rcu_head rcu; +}; + +struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, + unsigned int gc_seq, gfp_t gfp); +void nft_trans_gc_destroy(struct nft_trans_gc *trans); + +struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, + unsigned int gc_seq, gfp_t gfp); +void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc); + +struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp); +void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans); + +void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv); + +struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, + unsigned int gc_seq); +struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc); + +void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + +int __init nft_chain_filter_init(void); +void nft_chain_filter_fini(void); + +void __init nft_chain_route_init(void); +void nft_chain_route_fini(void); + +void nf_tables_trans_destroy_flush_work(void); + +int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result); +__be64 nf_jiffies64_to_msecs(u64 input); + +#ifdef CONFIG_MODULES +__printf(2, 3) int nft_request_module(struct net *net, const char *fmt, ...); +#else +static inline int nft_request_module(struct net *net, const char *fmt, ...) { return -ENOENT; } +#endif + +struct nftables_pernet { + struct list_head tables; + struct list_head commit_list; + struct list_head binding_list; + struct list_head module_list; + struct list_head notify_list; + struct mutex commit_mutex; + u64 table_handle; + unsigned int base_seq; + unsigned int gc_seq; + u8 validate_state; +}; + +extern unsigned int nf_tables_net_id; + +static inline struct nftables_pernet *nft_pernet(const struct net *net) +{ + return net_generic(net, nf_tables_net_id); +} + +#define __NFT_REDUCE_READONLY 1UL +#define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY + +static inline bool nft_reduce_is_readonly(const struct nft_expr *expr) +{ + return expr->ops->reduce == NFT_REDUCE_READONLY; +} + +void nft_reg_track_update(struct nft_regs_track *track, + const struct nft_expr *expr, u8 dreg, u8 len); +void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len); +void __nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg); + +static inline bool nft_reg_track_cmp(struct nft_regs_track *track, + const struct nft_expr *expr, u8 dreg) +{ + return track->regs[dreg].selector && + track->regs[dreg].selector->ops == expr->ops && + track->regs[dreg].num_reg == 0; +} + +#endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h new file mode 100644 index 0000000000..780a5f6ad4 --- /dev/null +++ b/include/net/netfilter/nf_tables_core.h @@ -0,0 +1,183 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NET_NF_TABLES_CORE_H +#define _NET_NF_TABLES_CORE_H + +#include <net/netfilter/nf_tables.h> +#include <linux/indirect_call_wrapper.h> + +extern struct nft_expr_type nft_imm_type; +extern struct nft_expr_type nft_cmp_type; +extern struct nft_expr_type nft_counter_type; +extern struct nft_expr_type nft_lookup_type; +extern struct nft_expr_type nft_bitwise_type; +extern struct nft_expr_type nft_byteorder_type; +extern struct nft_expr_type nft_payload_type; +extern struct nft_expr_type nft_dynset_type; +extern struct nft_expr_type nft_range_type; +extern struct nft_expr_type nft_meta_type; +extern struct nft_expr_type nft_rt_type; +extern struct nft_expr_type nft_exthdr_type; +extern struct nft_expr_type nft_last_type; +extern struct nft_expr_type nft_objref_type; +extern struct nft_expr_type nft_inner_type; + +#ifdef CONFIG_NETWORK_SECMARK +extern struct nft_object_type nft_secmark_obj_type; +#endif +extern struct nft_object_type nft_counter_obj_type; + +int nf_tables_core_module_init(void); +void nf_tables_core_module_exit(void); + +struct nft_bitwise_fast_expr { + u32 mask; + u32 xor; + u8 sreg; + u8 dreg; +}; + +struct nft_cmp_fast_expr { + u32 data; + u32 mask; + u8 sreg; + u8 len; + bool inv; +}; + +struct nft_cmp16_fast_expr { + struct nft_data data; + struct nft_data mask; + u8 sreg; + u8 len; + bool inv; +}; + +struct nft_immediate_expr { + struct nft_data data; + u8 dreg; + u8 dlen; +}; + +extern const struct nft_expr_ops nft_cmp_fast_ops; +extern const struct nft_expr_ops nft_cmp16_fast_ops; + +struct nft_ct { + enum nft_ct_keys key:8; + enum ip_conntrack_dir dir:8; + u8 len; + union { + u8 dreg; + u8 sreg; + }; +}; + +struct nft_payload { + enum nft_payload_bases base:8; + u8 offset; + u8 len; + u8 dreg; +}; + +extern const struct nft_expr_ops nft_payload_fast_ops; + +extern const struct nft_expr_ops nft_bitwise_fast_ops; + +extern struct static_key_false nft_counters_enabled; +extern struct static_key_false nft_trace_enabled; + +extern const struct nft_set_type nft_set_rhash_type; +extern const struct nft_set_type nft_set_hash_type; +extern const struct nft_set_type nft_set_hash_fast_type; +extern const struct nft_set_type nft_set_rbtree_type; +extern const struct nft_set_type nft_set_bitmap_type; +extern const struct nft_set_type nft_set_pipapo_type; +extern const struct nft_set_type nft_set_pipapo_avx2_type; + +#ifdef CONFIG_RETPOLINE +bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext); +bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext); +bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext); +bool nft_hash_lookup_fast(const struct net *net, + const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext); +bool nft_hash_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext); +bool nft_set_do_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext); +#else +static inline bool +nft_set_do_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) +{ + return set->ops->lookup(net, set, key, ext); +} +#endif + +/* called from nft_pipapo_avx2.c */ +bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext); +/* called from nft_set_pipapo.c */ +bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext); + +void nft_counter_init_seqcount(void); + +struct nft_expr; +struct nft_regs; +struct nft_pktinfo; +void nft_meta_get_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_cmp_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_lookup_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_payload_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_immediate_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_bitwise_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_range_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_byteorder_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_dynset_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_rt_get_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); +void nft_ct_get_fast_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); + +enum { + NFT_PAYLOAD_CTX_INNER_TUN = (1 << 0), + NFT_PAYLOAD_CTX_INNER_LL = (1 << 1), + NFT_PAYLOAD_CTX_INNER_NH = (1 << 2), + NFT_PAYLOAD_CTX_INNER_TH = (1 << 3), +}; + +struct nft_inner_tun_ctx { + u16 type; + u16 inner_tunoff; + u16 inner_lloff; + u16 inner_nhoff; + u16 inner_thoff; + __be16 llproto; + u8 l4proto; + u8 flags; +}; + +int nft_payload_inner_offset(const struct nft_pktinfo *pkt); +void nft_payload_inner_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt, + struct nft_inner_tun_ctx *ctx); + +void nft_objref_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); +void nft_objref_map_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); +#endif /* _NET_NF_TABLES_CORE_H */ diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h new file mode 100644 index 0000000000..60a7d0ce30 --- /dev/null +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_TABLES_IPV4_H_ +#define _NF_TABLES_IPV4_H_ + +#include <net/netfilter/nf_tables.h> +#include <net/ip.h> + +static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt) +{ + struct iphdr *ip; + + ip = ip_hdr(pkt->skb); + pkt->flags = NFT_PKTINFO_L4PROTO; + pkt->tprot = ip->protocol; + pkt->thoff = ip_hdrlen(pkt->skb); + pkt->fragoff = ntohs(ip->frag_off) & IP_OFFSET; +} + +static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) +{ + struct iphdr *iph, _iph; + u32 len, thoff; + + iph = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb), + sizeof(*iph), &_iph); + if (!iph) + return -1; + + if (iph->ihl < 5 || iph->version != 4) + return -1; + + len = iph_totlen(pkt->skb, iph); + thoff = skb_network_offset(pkt->skb) + (iph->ihl * 4); + if (pkt->skb->len < len) + return -1; + else if (len < thoff) + return -1; + else if (thoff < sizeof(*iph)) + return -1; + + pkt->flags = NFT_PKTINFO_L4PROTO; + pkt->tprot = iph->protocol; + pkt->thoff = thoff; + pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET; + + return 0; +} + +static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) +{ + if (__nft_set_pktinfo_ipv4_validate(pkt) < 0) + nft_set_pktinfo_unspec(pkt); +} + +static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt) +{ + struct iphdr *iph; + u32 len, thoff; + + if (!pskb_may_pull(pkt->skb, sizeof(*iph))) + return -1; + + iph = ip_hdr(pkt->skb); + if (iph->ihl < 5 || iph->version != 4) + goto inhdr_error; + + len = iph_totlen(pkt->skb, iph); + thoff = iph->ihl * 4; + if (pkt->skb->len < len) { + __IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INTRUNCATEDPKTS); + return -1; + } else if (len < thoff) { + goto inhdr_error; + } else if (thoff < sizeof(*iph)) { + return -1; + } + + pkt->flags = NFT_PKTINFO_L4PROTO; + pkt->tprot = iph->protocol; + pkt->thoff = thoff; + pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET; + + return 0; + +inhdr_error: + __IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INHDRERRORS); + return -1; +} + +#endif diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h new file mode 100644 index 0000000000..467d59b9e5 --- /dev/null +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_TABLES_IPV6_H_ +#define _NF_TABLES_IPV6_H_ + +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <net/ipv6.h> +#include <net/netfilter/nf_tables.h> + +static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt) +{ + unsigned int flags = IP6_FH_F_AUTH; + int protohdr, thoff = 0; + unsigned short frag_off; + + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); + if (protohdr < 0 || thoff > U16_MAX) { + nft_set_pktinfo_unspec(pkt); + return; + } + + pkt->flags = NFT_PKTINFO_L4PROTO; + pkt->tprot = protohdr; + pkt->thoff = thoff; + pkt->fragoff = frag_off; +} + +static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) +{ +#if IS_ENABLED(CONFIG_IPV6) + unsigned int flags = IP6_FH_F_AUTH; + struct ipv6hdr *ip6h, _ip6h; + unsigned int thoff = 0; + unsigned short frag_off; + int protohdr; + u32 pkt_len; + + ip6h = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb), + sizeof(*ip6h), &_ip6h); + if (!ip6h) + return -1; + + if (ip6h->version != 6) + return -1; + + pkt_len = ntohs(ip6h->payload_len); + if (pkt_len + sizeof(*ip6h) > pkt->skb->len) + return -1; + + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); + if (protohdr < 0 || thoff > U16_MAX) + return -1; + + pkt->flags = NFT_PKTINFO_L4PROTO; + pkt->tprot = protohdr; + pkt->thoff = thoff; + pkt->fragoff = frag_off; + + return 0; +#else + return -1; +#endif +} + +static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) +{ + if (__nft_set_pktinfo_ipv6_validate(pkt) < 0) + nft_set_pktinfo_unspec(pkt); +} + +static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt) +{ +#if IS_ENABLED(CONFIG_IPV6) + unsigned int flags = IP6_FH_F_AUTH; + unsigned short frag_off; + unsigned int thoff = 0; + struct inet6_dev *idev; + struct ipv6hdr *ip6h; + int protohdr; + u32 pkt_len; + + if (!pskb_may_pull(pkt->skb, sizeof(*ip6h))) + return -1; + + ip6h = ipv6_hdr(pkt->skb); + if (ip6h->version != 6) + goto inhdr_error; + + pkt_len = ntohs(ip6h->payload_len); + if (pkt_len + sizeof(*ip6h) > pkt->skb->len) { + idev = __in6_dev_get(nft_in(pkt)); + __IP6_INC_STATS(nft_net(pkt), idev, IPSTATS_MIB_INTRUNCATEDPKTS); + return -1; + } + + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); + if (protohdr < 0 || thoff > U16_MAX) + goto inhdr_error; + + pkt->flags = NFT_PKTINFO_L4PROTO; + pkt->tprot = protohdr; + pkt->thoff = thoff; + pkt->fragoff = frag_off; + + return 0; + +inhdr_error: + idev = __in6_dev_get(nft_in(pkt)); + __IP6_INC_STATS(nft_net(pkt), idev, IPSTATS_MIB_INHDRERRORS); + return -1; +#else + return -1; +#endif +} + +#endif diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h new file mode 100644 index 0000000000..3568b6a2f5 --- /dev/null +++ b/include/net/netfilter/nf_tables_offload.h @@ -0,0 +1,100 @@ +#ifndef _NET_NF_TABLES_OFFLOAD_H +#define _NET_NF_TABLES_OFFLOAD_H + +#include <net/flow_offload.h> +#include <net/netfilter/nf_tables.h> + +enum nft_offload_reg_flags { + NFT_OFFLOAD_F_NETWORK2HOST = (1 << 0), +}; + +struct nft_offload_reg { + u32 key; + u32 len; + u32 base_offset; + u32 offset; + u32 flags; + struct nft_data data; + struct nft_data mask; +}; + +enum nft_offload_dep_type { + NFT_OFFLOAD_DEP_UNSPEC = 0, + NFT_OFFLOAD_DEP_NETWORK, + NFT_OFFLOAD_DEP_TRANSPORT, +}; + +struct nft_offload_ctx { + struct { + enum nft_offload_dep_type type; + __be16 l3num; + u8 protonum; + } dep; + unsigned int num_actions; + struct net *net; + struct nft_offload_reg regs[NFT_REG32_15 + 1]; +}; + +void nft_offload_set_dependency(struct nft_offload_ctx *ctx, + enum nft_offload_dep_type type); +void nft_offload_update_dependency(struct nft_offload_ctx *ctx, + const void *data, u32 len); + +struct nft_flow_key { + struct flow_dissector_key_basic basic; + struct flow_dissector_key_control control; + union { + struct flow_dissector_key_ipv4_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; + }; + struct flow_dissector_key_ports tp; + struct flow_dissector_key_ip ip; + struct flow_dissector_key_vlan vlan; + struct flow_dissector_key_vlan cvlan; + struct flow_dissector_key_eth_addrs eth_addrs; + struct flow_dissector_key_meta meta; +} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ + +struct nft_flow_match { + struct flow_dissector dissector; + struct nft_flow_key key; + struct nft_flow_key mask; +}; + +struct nft_flow_rule { + __be16 proto; + struct nft_flow_match match; + struct flow_rule *rule; +}; + +void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, + enum flow_dissector_key_id addr_type); + +struct nft_rule; +struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule); +int nft_flow_rule_stats(const struct nft_chain *chain, const struct nft_rule *rule); +void nft_flow_rule_destroy(struct nft_flow_rule *flow); +int nft_flow_rule_offload_commit(struct net *net); + +#define NFT_OFFLOAD_MATCH_FLAGS(__key, __base, __field, __len, __reg, __flags) \ + (__reg)->base_offset = \ + offsetof(struct nft_flow_key, __base); \ + (__reg)->offset = \ + offsetof(struct nft_flow_key, __base.__field); \ + (__reg)->len = __len; \ + (__reg)->key = __key; \ + (__reg)->flags = __flags; + +#define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \ + NFT_OFFLOAD_MATCH_FLAGS(__key, __base, __field, __len, __reg, 0) + +#define NFT_OFFLOAD_MATCH_EXACT(__key, __base, __field, __len, __reg) \ + NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \ + memset(&(__reg)->mask, 0xff, (__reg)->len); + +bool nft_chain_offload_support(const struct nft_base_chain *basechain); + +int nft_offload_init(void); +void nft_offload_exit(void); + +#endif diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h new file mode 100644 index 0000000000..faa108b1ba --- /dev/null +++ b/include/net/netfilter/nf_tproxy.h @@ -0,0 +1,128 @@ +#ifndef _NF_TPROXY_H_ +#define _NF_TPROXY_H_ + +#include <net/tcp.h> + +enum nf_tproxy_lookup_t { + NF_TPROXY_LOOKUP_LISTENER, + NF_TPROXY_LOOKUP_ESTABLISHED, +}; + +static inline bool nf_tproxy_sk_is_transparent(struct sock *sk) +{ + if (inet_sk_transparent(sk)) + return true; + + sock_gen_put(sk); + return false; +} + +static inline void nf_tproxy_twsk_deschedule_put(struct inet_timewait_sock *tw) +{ + local_bh_disable(); + inet_twsk_deschedule_put(tw); + local_bh_enable(); +} + +/* assign a socket to the skb -- consumes sk */ +static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) +{ + skb_orphan(skb); + skb->sk = sk; + skb->destructor = sock_edemux; +} + +__be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr); + +/** + * nf_tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections + * @skb: The skb being processed. + * @laddr: IPv4 address to redirect to or zero. + * @lport: TCP port to redirect to or zero. + * @sk: The TIME_WAIT TCP socket found by the lookup. + * + * We have to handle SYN packets arriving to TIME_WAIT sockets + * differently: instead of reopening the connection we should rather + * redirect the new connection to the proxy if there's a listener + * socket present. + * + * nf_tproxy_handle_time_wait4() consumes the socket reference passed in. + * + * Returns the listener socket if there's one, the TIME_WAIT socket if + * no such listener is found, or NULL if the TCP header is incomplete. + */ +struct sock * +nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, + __be32 laddr, __be16 lport, struct sock *sk); + +/* + * This is used when the user wants to intercept a connection matching + * an explicit iptables rule. In this case the sockets are assumed + * matching in preference order: + * + * - match: if there's a fully established connection matching the + * _packet_ tuple, it is returned, assuming the redirection + * already took place and we process a packet belonging to an + * established connection + * + * - match: if there's a listening socket matching the redirection + * (e.g. on-port & on-ip of the connection), it is returned, + * regardless if it was bound to 0.0.0.0 or an explicit + * address. The reasoning is that if there's an explicit rule, it + * does not really matter if the listener is bound to an interface + * or to 0. The user already stated that he wants redirection + * (since he added the rule). + * + * Please note that there's an overlap between what a TPROXY target + * and a socket match will match. Normally if you have both rules the + * "socket" match will be the first one, effectively all packets + * belonging to established connections going through that one. + */ +struct sock * +nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, + const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, + const enum nf_tproxy_lookup_t lookup_type); + +const struct in6_addr * +nf_tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, + const struct in6_addr *daddr); + +/** + * nf_tproxy_handle_time_wait6 - handle IPv6 TCP TIME_WAIT reopen redirections + * @skb: The skb being processed. + * @tproto: Transport protocol. + * @thoff: Transport protocol header offset. + * @net: Network namespace. + * @laddr: IPv6 address to redirect to. + * @lport: TCP port to redirect to or zero. + * @sk: The TIME_WAIT TCP socket found by the lookup. + * + * We have to handle SYN packets arriving to TIME_WAIT sockets + * differently: instead of reopening the connection we should rather + * redirect the new connection to the proxy if there's a listener + * socket present. + * + * nf_tproxy_handle_time_wait6() consumes the socket reference passed in. + * + * Returns the listener socket if there's one, the TIME_WAIT socket if + * no such listener is found, or NULL if the TCP header is incomplete. + */ +struct sock * +nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, + struct net *net, + const struct in6_addr *laddr, + const __be16 lport, + struct sock *sk); + +struct sock * +nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, + const u8 protocol, + const struct in6_addr *saddr, const struct in6_addr *daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, + const enum nf_tproxy_lookup_t lookup_type); + +#endif /* _NF_TPROXY_H_ */ diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h new file mode 100644 index 0000000000..167640b843 --- /dev/null +++ b/include/net/netfilter/nft_fib.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NFT_FIB_H_ +#define _NFT_FIB_H_ + +#include <net/netfilter/nf_tables.h> + +struct nft_fib { + u8 dreg; + u8 result; + u32 flags; +}; + +extern const struct nla_policy nft_fib_policy[]; + +static inline bool +nft_fib_is_loopback(const struct sk_buff *skb, const struct net_device *in) +{ + return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK; +} + +int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset); +int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]); +int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data); + + +void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); +void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); + +void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); +void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); + +void nft_fib_store_result(void *reg, const struct nft_fib *priv, + const struct net_device *dev); + +bool nft_fib_reduce(struct nft_regs_track *track, + const struct nft_expr *expr); +#endif diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h new file mode 100644 index 0000000000..ba1238f12a --- /dev/null +++ b/include/net/netfilter/nft_meta.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NFT_META_H_ +#define _NFT_META_H_ + +#include <net/netfilter/nf_tables.h> + +struct nft_meta { + enum nft_meta_keys key:8; + u8 len; + union { + u8 dreg; + u8 sreg; + }; +}; + +extern const struct nla_policy nft_meta_policy[]; + +int nft_meta_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]); + +int nft_meta_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]); + +int nft_meta_get_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset); + +int nft_meta_set_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset); + +void nft_meta_get_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt); + +void nft_meta_set_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt); + +void nft_meta_set_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr); + +int nft_meta_set_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data); + +bool nft_meta_get_reduce(struct nft_regs_track *track, + const struct nft_expr *expr); + +struct nft_inner_tun_ctx; +void nft_meta_inner_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt, + struct nft_inner_tun_ctx *tun_ctx); + +#endif diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h new file mode 100644 index 0000000000..6d9ba62efd --- /dev/null +++ b/include/net/netfilter/nft_reject.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NFT_REJECT_H_ +#define _NFT_REJECT_H_ + +#include <linux/types.h> +#include <net/netlink.h> +#include <net/netfilter/nf_tables.h> +#include <uapi/linux/netfilter/nf_tables.h> + +struct nft_reject { + enum nft_reject_types type:8; + u8 icmp_code; +}; + +extern const struct nla_policy nft_reject_policy[]; + +int nft_reject_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data); + +int nft_reject_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]); + +int nft_reject_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset); + +int nft_reject_icmp_code(u8 code); +int nft_reject_icmpv6_code(u8 code); + +#endif diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h new file mode 100644 index 0000000000..4c3809e141 --- /dev/null +++ b/include/net/netfilter/xt_rateest.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _XT_RATEEST_H +#define _XT_RATEEST_H + +#include <net/gen_stats.h> + +struct xt_rateest { + /* keep lock and bstats on same cache line to speedup xt_rateest_tg() */ + struct gnet_stats_basic_sync bstats; + spinlock_t lock; + + + /* following fields not accessed in hot path */ + unsigned int refcnt; + struct hlist_node list; + char name[IFNAMSIZ]; + struct gnet_estimator params; + struct rcu_head rcu; + + /* keep this field far away to speedup xt_rateest_mt() */ + struct net_rate_estimator __rcu *rate_est; +}; + +struct xt_rateest *xt_rateest_lookup(struct net *net, const char *name); +void xt_rateest_put(struct net *net, struct xt_rateest *est); + +#endif /* _XT_RATEEST_H */ |