From 2c3c1048746a4622d8c89a29670120dc8fab93c4 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:49:45 +0200 Subject: Adding upstream version 6.1.76. Signed-off-by: Daniel Baumann --- net/netfilter/nf_nat_masquerade.c | 368 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 368 insertions(+) create mode 100644 net/netfilter/nf_nat_masquerade.c (limited to 'net/netfilter/nf_nat_masquerade.c') diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c new file mode 100644 index 000000000..1a506b0c6 --- /dev/null +++ b/net/netfilter/nf_nat_masquerade.c @@ -0,0 +1,368 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include + +#include + +struct masq_dev_work { + struct work_struct work; + struct net *net; + netns_tracker ns_tracker; + union nf_inet_addr addr; + int ifindex; + int (*iter)(struct nf_conn *i, void *data); +}; + +#define MAX_MASQ_WORKER_COUNT 16 + +static DEFINE_MUTEX(masq_mutex); +static unsigned int masq_refcnt __read_mostly; +static atomic_t masq_worker_count __read_mostly; + +unsigned int +nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, + const struct nf_nat_range2 *range, + const struct net_device *out) +{ + struct nf_conn *ct; + struct nf_conn_nat *nat; + enum ip_conntrack_info ctinfo; + struct nf_nat_range2 newrange; + const struct rtable *rt; + __be32 newsrc, nh; + + WARN_ON(hooknum != NF_INET_POST_ROUTING); + + ct = nf_ct_get(skb, &ctinfo); + + WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY))); + + /* Source address is 0.0.0.0 - locally generated packet that is + * probably not supposed to be masqueraded. + */ + if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) + return NF_ACCEPT; + + rt = skb_rtable(skb); + nh = rt_nexthop(rt, ip_hdr(skb)->daddr); + newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE); + if (!newsrc) { + pr_info("%s ate my IP address\n", out->name); + return NF_DROP; + } + + nat = nf_ct_nat_ext_add(ct); + if (nat) + nat->masq_index = out->ifindex; + + /* Transfer from original range. */ + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = newsrc; + newrange.max_addr.ip = newsrc; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + /* Hand modified range to generic setup. */ + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); +} +EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4); + +static void iterate_cleanup_work(struct work_struct *work) +{ + struct nf_ct_iter_data iter_data = {}; + struct masq_dev_work *w; + + w = container_of(work, struct masq_dev_work, work); + + iter_data.net = w->net; + iter_data.data = (void *)w; + nf_ct_iterate_cleanup_net(w->iter, &iter_data); + + put_net_track(w->net, &w->ns_tracker); + kfree(w); + atomic_dec(&masq_worker_count); + module_put(THIS_MODULE); +} + +/* Iterate conntrack table in the background and remove conntrack entries + * that use the device/address being removed. + * + * In case too many work items have been queued already or memory allocation + * fails iteration is skipped, conntrack entries will time out eventually. + */ +static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr, + int ifindex, + int (*iter)(struct nf_conn *i, void *data), + gfp_t gfp_flags) +{ + struct masq_dev_work *w; + + if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT) + return; + + net = maybe_get_net(net); + if (!net) + return; + + if (!try_module_get(THIS_MODULE)) + goto err_module; + + w = kzalloc(sizeof(*w), gfp_flags); + if (w) { + /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */ + atomic_inc(&masq_worker_count); + + INIT_WORK(&w->work, iterate_cleanup_work); + w->ifindex = ifindex; + w->net = net; + netns_tracker_alloc(net, &w->ns_tracker, gfp_flags); + w->iter = iter; + if (addr) + w->addr = *addr; + schedule_work(&w->work); + return; + } + + module_put(THIS_MODULE); + err_module: + put_net(net); +} + +static int device_cmp(struct nf_conn *i, void *arg) +{ + const struct nf_conn_nat *nat = nfct_nat(i); + const struct masq_dev_work *w = arg; + + if (!nat) + return 0; + return nat->masq_index == w->ifindex; +} + +static int masq_device_event(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + const struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); + + if (event == NETDEV_DOWN) { + /* Device was downed. Search entire table for + * conntracks which were associated with that device, + * and forget them. + */ + + nf_nat_masq_schedule(net, NULL, dev->ifindex, + device_cmp, GFP_KERNEL); + } + + return NOTIFY_DONE; +} + +static int inet_cmp(struct nf_conn *ct, void *ptr) +{ + struct nf_conntrack_tuple *tuple; + struct masq_dev_work *w = ptr; + + if (!device_cmp(ct, ptr)) + return 0; + + tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3); +} + +static int masq_inet_event(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + const struct in_ifaddr *ifa = ptr; + const struct in_device *idev; + const struct net_device *dev; + union nf_inet_addr addr; + + if (event != NETDEV_DOWN) + return NOTIFY_DONE; + + /* The masq_dev_notifier will catch the case of the device going + * down. So if the inetdev is dead and being destroyed we have + * no work to do. Otherwise this is an individual address removal + * and we have to perform the flush. + */ + idev = ifa->ifa_dev; + if (idev->dead) + return NOTIFY_DONE; + + memset(&addr, 0, sizeof(addr)); + + addr.ip = ifa->ifa_address; + + dev = idev->dev; + nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex, + inet_cmp, GFP_KERNEL); + + return NOTIFY_DONE; +} + +static struct notifier_block masq_dev_notifier = { + .notifier_call = masq_device_event, +}; + +static struct notifier_block masq_inet_notifier = { + .notifier_call = masq_inet_event, +}; + +#if IS_ENABLED(CONFIG_IPV6) +static int +nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, + const struct in6_addr *daddr, unsigned int srcprefs, + struct in6_addr *saddr) +{ +#ifdef CONFIG_IPV6_MODULE + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return -EHOSTUNREACH; + + return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); +#else + return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); +#endif +} + +unsigned int +nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, + const struct net_device *out) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn_nat *nat; + struct in6_addr src; + struct nf_conn *ct; + struct nf_nat_range2 newrange; + + ct = nf_ct_get(skb, &ctinfo); + WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY))); + + if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, + &ipv6_hdr(skb)->daddr, 0, &src) < 0) + return NF_DROP; + + nat = nf_ct_nat_ext_add(ct); + if (nat) + nat->masq_index = out->ifindex; + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.in6 = src; + newrange.max_addr.in6 = src; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); +} +EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); + +/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep). + * + * Defer it to the system workqueue. + * + * As we can have 'a lot' of inet_events (depending on amount of ipv6 + * addresses being deleted), we also need to limit work item queue. + */ +static int masq_inet6_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *ifa = ptr; + const struct net_device *dev; + union nf_inet_addr addr; + + if (event != NETDEV_DOWN) + return NOTIFY_DONE; + + dev = ifa->idev->dev; + + memset(&addr, 0, sizeof(addr)); + + addr.in6 = ifa->addr; + + nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp, + GFP_ATOMIC); + return NOTIFY_DONE; +} + +static struct notifier_block masq_inet6_notifier = { + .notifier_call = masq_inet6_event, +}; + +static int nf_nat_masquerade_ipv6_register_notifier(void) +{ + return register_inet6addr_notifier(&masq_inet6_notifier); +} +#else +static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; } +#endif + +int nf_nat_masquerade_inet_register_notifiers(void) +{ + int ret = 0; + + mutex_lock(&masq_mutex); + if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) { + ret = -EOVERFLOW; + goto out_unlock; + } + + /* check if the notifier was already set */ + if (++masq_refcnt > 1) + goto out_unlock; + + /* Register for device down reports */ + ret = register_netdevice_notifier(&masq_dev_notifier); + if (ret) + goto err_dec; + /* Register IP address change reports */ + ret = register_inetaddr_notifier(&masq_inet_notifier); + if (ret) + goto err_unregister; + + ret = nf_nat_masquerade_ipv6_register_notifier(); + if (ret) + goto err_unreg_inet; + + mutex_unlock(&masq_mutex); + return ret; +err_unreg_inet: + unregister_inetaddr_notifier(&masq_inet_notifier); +err_unregister: + unregister_netdevice_notifier(&masq_dev_notifier); +err_dec: + masq_refcnt--; +out_unlock: + mutex_unlock(&masq_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers); + +void nf_nat_masquerade_inet_unregister_notifiers(void) +{ + mutex_lock(&masq_mutex); + /* check if the notifiers still have clients */ + if (--masq_refcnt > 0) + goto out_unlock; + + unregister_netdevice_notifier(&masq_dev_notifier); + unregister_inetaddr_notifier(&masq_inet_notifier); +#if IS_ENABLED(CONFIG_IPV6) + unregister_inet6addr_notifier(&masq_inet6_notifier); +#endif +out_unlock: + mutex_unlock(&masq_mutex); +} +EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers); -- cgit v1.2.3