diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
commit | ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch) | |
tree | b2d64bc10158fdd5497876388cd68142ca374ed3 /net/ipv4/netfilter | |
parent | Initial commit. (diff) | |
download | linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip |
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'net/ipv4/netfilter')
28 files changed, 7983 insertions, 0 deletions
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig new file mode 100644 index 0000000000..f71a7e9a7d --- /dev/null +++ b/net/ipv4/netfilter/Kconfig @@ -0,0 +1,344 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# IP netfilter configuration +# + +menu "IP: Netfilter Configuration" + depends on INET && NETFILTER + +config NF_DEFRAG_IPV4 + tristate + default n + +config NF_SOCKET_IPV4 + tristate "IPv4 socket lookup support" + help + This option enables the IPv4 socket lookup infrastructure. This is + is required by the {ip,nf}tables socket match. + +config NF_TPROXY_IPV4 + tristate "IPv4 tproxy support" + +if NF_TABLES + +config NF_TABLES_IPV4 + bool "IPv4 nf_tables support" + help + This option enables the IPv4 support for nf_tables. + +if NF_TABLES_IPV4 + +config NFT_REJECT_IPV4 + select NF_REJECT_IPV4 + default NFT_REJECT + tristate + +config NFT_DUP_IPV4 + tristate "IPv4 nf_tables packet duplication support" + depends on !NF_CONNTRACK || NF_CONNTRACK + select NF_DUP_IPV4 + help + This module enables IPv4 packet duplication support for nf_tables. + +config NFT_FIB_IPV4 + select NFT_FIB + tristate "nf_tables fib / ip route lookup support" + help + This module enables IPv4 FIB lookups, e.g. for reverse path filtering. + It also allows query of the FIB for the route type, e.g. local, unicast, + multicast or blackhole. + +endif # NF_TABLES_IPV4 + +config NF_TABLES_ARP + bool "ARP nf_tables support" + select NETFILTER_FAMILY_ARP + help + This option enables the ARP support for nf_tables. + +endif # NF_TABLES + +config NF_DUP_IPV4 + tristate "Netfilter IPv4 packet duplication to alternate destination" + depends on !NF_CONNTRACK || NF_CONNTRACK + help + This option enables the nf_dup_ipv4 core, which duplicates an IPv4 + packet to be rerouted to another destination. + +config NF_LOG_ARP + tristate "ARP packet logging" + default m if NETFILTER_ADVANCED=n + select NF_LOG_SYSLOG + help + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects CONFIG_NF_LOG_SYSLOG. + +config NF_LOG_IPV4 + tristate "IPv4 packet logging" + default m if NETFILTER_ADVANCED=n + select NF_LOG_SYSLOG + help + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects CONFIG_NF_LOG_SYSLOG. + +config NF_REJECT_IPV4 + tristate "IPv4 packet rejection" + default m if NETFILTER_ADVANCED=n + +if NF_NAT +config NF_NAT_SNMP_BASIC + tristate "Basic SNMP-ALG support" + depends on NF_CONNTRACK_SNMP + depends on NETFILTER_ADVANCED + default NF_NAT && NF_CONNTRACK_SNMP + select ASN1 + help + + This module implements an Application Layer Gateway (ALG) for + SNMP payloads. In conjunction with NAT, it allows a network + management system to access multiple private networks with + conflicting addresses. It works by modifying IP addresses + inside SNMP payloads to match IP-layer NAT mapping. + + This is the "basic" form of SNMP-ALG, as described in RFC 2962 + + To compile it as a module, choose M here. If unsure, say N. + +config NF_NAT_PPTP + tristate + depends on NF_CONNTRACK + default NF_CONNTRACK_PPTP + +config NF_NAT_H323 + tristate + depends on NF_CONNTRACK + default NF_CONNTRACK_H323 + +endif # NF_NAT + +config IP_NF_IPTABLES + tristate "IP tables support (required for filtering/masq/NAT)" + default m if NETFILTER_ADVANCED=n + select NETFILTER_XTABLES + help + iptables is a general, extensible packet identification framework. + The packet filtering and full NAT (masquerading, port forwarding, + etc) subsystems now use this: say `Y' or `M' here if you want to use + either of those. + + To compile it as a module, choose M here. If unsure, say N. + +if IP_NF_IPTABLES + +# The matches. +config IP_NF_MATCH_AH + tristate '"ah" match support' + depends on NETFILTER_ADVANCED + help + This match extension allows you to match a range of SPIs + inside AH header of IPSec packets. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_ECN + tristate '"ecn" match support' + depends on NETFILTER_ADVANCED + select NETFILTER_XT_MATCH_ECN + help + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_MATCH_ECN. + +config IP_NF_MATCH_RPFILTER + tristate '"rpfilter" reverse path filter match support' + depends on NETFILTER_ADVANCED + depends on IP_NF_MANGLE || IP_NF_RAW + help + This option allows you to match packets whose replies would + go out via the interface the packet came in. + + To compile it as a module, choose M here. If unsure, say N. + The module will be called ipt_rpfilter. + +config IP_NF_MATCH_TTL + tristate '"ttl" match support' + depends on NETFILTER_ADVANCED + select NETFILTER_XT_MATCH_HL + help + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_MATCH_HL. + +# `filter', generic and specific targets +config IP_NF_FILTER + tristate "Packet filtering" + default m if NETFILTER_ADVANCED=n + help + Packet filtering defines a table `filter', which has a series of + rules for simple packet filtering at local input, forwarding and + local output. See the man page for iptables(8). + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_REJECT + tristate "REJECT target support" + depends on IP_NF_FILTER + select NF_REJECT_IPV4 + default m if NETFILTER_ADVANCED=n + help + The REJECT target allows a filtering rule to specify that an ICMP + error should be issued in response to an incoming packet, rather + than silently being dropped. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_SYNPROXY + tristate "SYNPROXY target support" + depends on NF_CONNTRACK && NETFILTER_ADVANCED + select NETFILTER_SYNPROXY + select SYN_COOKIES + help + The SYNPROXY target allows you to intercept TCP connections and + establish them using syncookies before they are passed on to the + server. This allows to avoid conntrack and server resource usage + during SYN-flood attacks. + + To compile it as a module, choose M here. If unsure, say N. + +# NAT + specific targets: nf_conntrack +config IP_NF_NAT + tristate "iptables NAT support" + depends on NF_CONNTRACK + default m if NETFILTER_ADVANCED=n + select NF_NAT + select NETFILTER_XT_NAT + help + This enables the `nat' table in iptables. This allows masquerading, + port forwarding and other forms of full Network Address Port + Translation. + + To compile it as a module, choose M here. If unsure, say N. + +if IP_NF_NAT + +config IP_NF_TARGET_MASQUERADE + tristate "MASQUERADE target support" + select NETFILTER_XT_TARGET_MASQUERADE + help + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE. + +config IP_NF_TARGET_NETMAP + tristate "NETMAP target support" + depends on NETFILTER_ADVANCED + select NETFILTER_XT_TARGET_NETMAP + help + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_NETMAP. + +config IP_NF_TARGET_REDIRECT + tristate "REDIRECT target support" + depends on NETFILTER_ADVANCED + select NETFILTER_XT_TARGET_REDIRECT + help + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_REDIRECT. + +endif # IP_NF_NAT + +# mangle + specific targets +config IP_NF_MANGLE + tristate "Packet mangling" + default m if NETFILTER_ADVANCED=n + help + This option adds a `mangle' table to iptables: see the man page for + iptables(8). This table is used for various packet alterations + which can effect how the packet is routed. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_ECN + tristate "ECN target support" + depends on IP_NF_MANGLE + depends on NETFILTER_ADVANCED + help + This option adds a `ECN' target, which can be used in the iptables mangle + table. + + You can use this target to remove the ECN bits from the IPv4 header of + an IP packet. This is particularly useful, if you need to work around + existing ECN blackholes on the internet, but don't want to disable + ECN support in general. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_TTL + tristate '"TTL" target support' + depends on NETFILTER_ADVANCED && IP_NF_MANGLE + select NETFILTER_XT_TARGET_HL + help + This is a backwards-compatible option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_HL. + +# raw + specific targets +config IP_NF_RAW + tristate 'raw table support (required for NOTRACK/TRACE)' + help + This option adds a `raw' table to iptables. This table is the very + first in the netfilter framework and hooks in at the PREROUTING + and OUTPUT chains. + + If you want to compile it as a module, say M here and read + <file:Documentation/kbuild/modules.rst>. If unsure, say `N'. + +# security table for MAC policy +config IP_NF_SECURITY + tristate "Security table" + depends on SECURITY + depends on NETFILTER_ADVANCED + help + This option adds a `security' table to iptables, for use + with Mandatory Access Control (MAC) policy. + + If unsure, say N. + +endif # IP_NF_IPTABLES + +# ARP tables +config IP_NF_ARPTABLES + tristate "ARP tables support" + select NETFILTER_XTABLES + select NETFILTER_FAMILY_ARP + depends on NETFILTER_ADVANCED + help + arptables is a general, extensible packet identification framework. + The ARP packet filtering and mangling (manipulation)subsystems + use this: say Y or M here if you want to use either of those. + + To compile it as a module, choose M here. If unsure, say N. + +if IP_NF_ARPTABLES + +config IP_NF_ARPFILTER + tristate "ARP packet filtering" + help + ARP packet filtering defines a table `filter', which has a series of + rules for simple ARP packet filtering at local input and + local output. On a bridge, you can also specify filtering rules + for forwarded ARP packets. See the man page for arptables(8). + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_ARP_MANGLE + tristate "ARP payload mangling" + help + Allows altering the ARP packet payload: source and destination + hardware and network addresses. + +endif # IP_NF_ARPTABLES + +endmenu + diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile new file mode 100644 index 0000000000..5a26f9de1a --- /dev/null +++ b/net/ipv4/netfilter/Makefile @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the netfilter modules on top of IPv4. +# + +# defrag +obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o + +obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o +obj-$(CONFIG_NF_TPROXY_IPV4) += nf_tproxy_ipv4.o + +# reject +obj-$(CONFIG_NF_REJECT_IPV4) += nf_reject_ipv4.o + +# NAT helpers (nf_conntrack) +obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o +obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o + +nf_nat_snmp_basic-y := nf_nat_snmp_basic.asn1.o nf_nat_snmp_basic_main.o +$(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h +obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o + +obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o +obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o +obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o + +# generic IP tables +obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o + +# the three instances of ip_tables +obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o +obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o +obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o +obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o +obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o + +# matches +obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o +obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o + +# targets +obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o +obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o +obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o + +# generic ARP tables +obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o +obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o + +# just filtering instance of ARP tables for now +obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o + +obj-$(CONFIG_NF_DUP_IPV4) += nf_dup_ipv4.o diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c new file mode 100644 index 0000000000..2407066b0f --- /dev/null +++ b/net/ipv4/netfilter/arp_tables.c @@ -0,0 +1,1667 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Packet matching code for ARP packets. + * + * Based heavily, if not almost entirely, upon ip_tables.c framework. + * + * Some ARP specific bits are: + * + * Copyright (C) 2002 David S. Miller (davem@redhat.com) + * Copyright (C) 2006-2009 Patrick McHardy <kaber@trash.net> + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/capability.h> +#include <linux/if_arp.h> +#include <linux/kmod.h> +#include <linux/vmalloc.h> +#include <linux/proc_fs.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/mutex.h> +#include <linux/err.h> +#include <net/compat.h> +#include <net/sock.h> +#include <linux/uaccess.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_arp/arp_tables.h> +#include "../../netfilter/xt_repldata.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); +MODULE_DESCRIPTION("arptables core"); + +void *arpt_alloc_initial_table(const struct xt_table *info) +{ + return xt_alloc_initial_table(arpt, ARPT); +} +EXPORT_SYMBOL_GPL(arpt_alloc_initial_table); + +static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, + const char *hdr_addr, int len) +{ + int i, ret; + + if (len > ARPT_DEV_ADDR_LEN_MAX) + len = ARPT_DEV_ADDR_LEN_MAX; + + ret = 0; + for (i = 0; i < len; i++) + ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i]; + + return ret != 0; +} + +/* + * Unfortunately, _b and _mask are not aligned to an int (or long int) + * Some arches dont care, unrolling the loop is a win on them. + * For other arches, we only have a 16bit alignement. + */ +static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask) +{ +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + unsigned long ret = ifname_compare_aligned(_a, _b, _mask); +#else + unsigned long ret = 0; + const u16 *a = (const u16 *)_a; + const u16 *b = (const u16 *)_b; + const u16 *mask = (const u16 *)_mask; + int i; + + for (i = 0; i < IFNAMSIZ/sizeof(u16); i++) + ret |= (a[i] ^ b[i]) & mask[i]; +#endif + return ret; +} + +/* Returns whether packet matches rule or not. */ +static inline int arp_packet_match(const struct arphdr *arphdr, + struct net_device *dev, + const char *indev, + const char *outdev, + const struct arpt_arp *arpinfo) +{ + const char *arpptr = (char *)(arphdr + 1); + const char *src_devaddr, *tgt_devaddr; + __be32 src_ipaddr, tgt_ipaddr; + long ret; + + if (NF_INVF(arpinfo, ARPT_INV_ARPOP, + (arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop)) + return 0; + + if (NF_INVF(arpinfo, ARPT_INV_ARPHRD, + (arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd)) + return 0; + + if (NF_INVF(arpinfo, ARPT_INV_ARPPRO, + (arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro)) + return 0; + + if (NF_INVF(arpinfo, ARPT_INV_ARPHLN, + (arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln)) + return 0; + + src_devaddr = arpptr; + arpptr += dev->addr_len; + memcpy(&src_ipaddr, arpptr, sizeof(u32)); + arpptr += sizeof(u32); + tgt_devaddr = arpptr; + arpptr += dev->addr_len; + memcpy(&tgt_ipaddr, arpptr, sizeof(u32)); + + if (NF_INVF(arpinfo, ARPT_INV_SRCDEVADDR, + arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, + dev->addr_len)) || + NF_INVF(arpinfo, ARPT_INV_TGTDEVADDR, + arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, + dev->addr_len))) + return 0; + + if (NF_INVF(arpinfo, ARPT_INV_SRCIP, + (src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr) || + NF_INVF(arpinfo, ARPT_INV_TGTIP, + (tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr)) + return 0; + + /* Look for ifname matches. */ + ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask); + + if (NF_INVF(arpinfo, ARPT_INV_VIA_IN, ret != 0)) + return 0; + + ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask); + + if (NF_INVF(arpinfo, ARPT_INV_VIA_OUT, ret != 0)) + return 0; + + return 1; +} + +static inline int arp_checkentry(const struct arpt_arp *arp) +{ + if (arp->flags & ~ARPT_F_MASK) + return 0; + if (arp->invflags & ~ARPT_INV_MASK) + return 0; + + return 1; +} + +static unsigned int +arpt_error(struct sk_buff *skb, const struct xt_action_param *par) +{ + net_err_ratelimited("arp_tables: error: '%s'\n", + (const char *)par->targinfo); + + return NF_DROP; +} + +static inline const struct xt_entry_target * +arpt_get_target_c(const struct arpt_entry *e) +{ + return arpt_get_target((struct arpt_entry *)e); +} + +static inline struct arpt_entry * +get_entry(const void *base, unsigned int offset) +{ + return (struct arpt_entry *)(base + offset); +} + +static inline +struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry) +{ + return (void *)entry + entry->next_offset; +} + +unsigned int arpt_do_table(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + const struct xt_table *table = priv; + unsigned int hook = state->hook; + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); + unsigned int verdict = NF_DROP; + const struct arphdr *arp; + struct arpt_entry *e, **jumpstack; + const char *indev, *outdev; + const void *table_base; + unsigned int cpu, stackidx = 0; + const struct xt_table_info *private; + struct xt_action_param acpar; + unsigned int addend; + + if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) + return NF_DROP; + + indev = state->in ? state->in->name : nulldevname; + outdev = state->out ? state->out->name : nulldevname; + + local_bh_disable(); + addend = xt_write_recseq_begin(); + private = READ_ONCE(table->private); /* Address dependency. */ + cpu = smp_processor_id(); + table_base = private->entries; + jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; + + /* No TEE support for arptables, so no need to switch to alternate + * stack. All targets that reenter must return absolute verdicts. + */ + e = get_entry(table_base, private->hook_entry[hook]); + + acpar.state = state; + acpar.hotdrop = false; + + arp = arp_hdr(skb); + do { + const struct xt_entry_target *t; + struct xt_counters *counter; + + if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { + e = arpt_next_entry(e); + continue; + } + + counter = xt_get_this_cpu_counter(&e->counters); + ADD_COUNTER(*counter, arp_hdr_len(skb->dev), 1); + + t = arpt_get_target_c(e); + + /* Standard target? */ + if (!t->u.kernel.target->target) { + int v; + + v = ((struct xt_standard_target *)t)->verdict; + if (v < 0) { + /* Pop from stack? */ + if (v != XT_RETURN) { + verdict = (unsigned int)(-v) - 1; + break; + } + if (stackidx == 0) { + e = get_entry(table_base, + private->underflow[hook]); + } else { + e = jumpstack[--stackidx]; + e = arpt_next_entry(e); + } + continue; + } + if (table_base + v + != arpt_next_entry(e)) { + if (unlikely(stackidx >= private->stacksize)) { + verdict = NF_DROP; + break; + } + jumpstack[stackidx++] = e; + } + + e = get_entry(table_base, v); + continue; + } + + acpar.target = t->u.kernel.target; + acpar.targinfo = t->data; + verdict = t->u.kernel.target->target(skb, &acpar); + + if (verdict == XT_CONTINUE) { + /* Target might have changed stuff. */ + arp = arp_hdr(skb); + e = arpt_next_entry(e); + } else { + /* Verdict */ + break; + } + } while (!acpar.hotdrop); + xt_write_recseq_end(addend); + local_bh_enable(); + + if (acpar.hotdrop) + return NF_DROP; + else + return verdict; +} + +/* All zeroes == unconditional rule. */ +static inline bool unconditional(const struct arpt_entry *e) +{ + static const struct arpt_arp uncond; + + return e->target_offset == sizeof(struct arpt_entry) && + memcmp(&e->arp, &uncond, sizeof(uncond)) == 0; +} + +/* Figures out from what hook each rule can be called: returns 0 if + * there are loops. Puts hook bitmask in comefrom. + */ +static int mark_source_chains(const struct xt_table_info *newinfo, + unsigned int valid_hooks, void *entry0, + unsigned int *offsets) +{ + unsigned int hook; + + /* No recursion; use packet counter to save back ptrs (reset + * to 0 as we leave), and comefrom to save source hook bitmask. + */ + for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) { + unsigned int pos = newinfo->hook_entry[hook]; + struct arpt_entry *e = entry0 + pos; + + if (!(valid_hooks & (1 << hook))) + continue; + + /* Set initial back pointer. */ + e->counters.pcnt = pos; + + for (;;) { + const struct xt_standard_target *t + = (void *)arpt_get_target_c(e); + int visited = e->comefrom & (1 << hook); + + if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) + return 0; + + e->comefrom + |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); + + /* Unconditional return/END. */ + if ((unconditional(e) && + (strcmp(t->target.u.user.name, + XT_STANDARD_TARGET) == 0) && + t->verdict < 0) || visited) { + unsigned int oldpos, size; + + /* Return: backtrack through the last + * big jump. + */ + do { + e->comefrom ^= (1<<NF_ARP_NUMHOOKS); + oldpos = pos; + pos = e->counters.pcnt; + e->counters.pcnt = 0; + + /* We're at the start. */ + if (pos == oldpos) + goto next; + + e = entry0 + pos; + } while (oldpos == pos + e->next_offset); + + /* Move along one */ + size = e->next_offset; + e = entry0 + pos + size; + if (pos + size >= newinfo->size) + return 0; + e->counters.pcnt = pos; + pos += size; + } else { + int newpos = t->verdict; + + if (strcmp(t->target.u.user.name, + XT_STANDARD_TARGET) == 0 && + newpos >= 0) { + /* This a jump; chase it. */ + if (!xt_find_jump_offset(offsets, newpos, + newinfo->number)) + return 0; + } else { + /* ... this is a fallthru */ + newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; + } + e = entry0 + newpos; + e->counters.pcnt = pos; + pos = newpos; + } + } +next: ; + } + return 1; +} + +static int check_target(struct arpt_entry *e, struct net *net, const char *name) +{ + struct xt_entry_target *t = arpt_get_target(e); + struct xt_tgchk_param par = { + .net = net, + .table = name, + .entryinfo = e, + .target = t->u.kernel.target, + .targinfo = t->data, + .hook_mask = e->comefrom, + .family = NFPROTO_ARP, + }; + + return xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); +} + +static int +find_check_entry(struct arpt_entry *e, struct net *net, const char *name, + unsigned int size, + struct xt_percpu_counter_alloc_state *alloc_state) +{ + struct xt_entry_target *t; + struct xt_target *target; + int ret; + + if (!xt_percpu_counter_alloc(alloc_state, &e->counters)) + return -ENOMEM; + + t = arpt_get_target(e); + target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { + ret = PTR_ERR(target); + goto out; + } + t->u.kernel.target = target; + + ret = check_target(e, net, name); + if (ret) + goto err; + return 0; +err: + module_put(t->u.kernel.target->me); +out: + xt_percpu_counter_free(&e->counters); + + return ret; +} + +static bool check_underflow(const struct arpt_entry *e) +{ + const struct xt_entry_target *t; + unsigned int verdict; + + if (!unconditional(e)) + return false; + t = arpt_get_target_c(e); + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) + return false; + verdict = ((struct xt_standard_target *)t)->verdict; + verdict = -verdict - 1; + return verdict == NF_DROP || verdict == NF_ACCEPT; +} + +static inline int check_entry_size_and_hooks(struct arpt_entry *e, + struct xt_table_info *newinfo, + const unsigned char *base, + const unsigned char *limit, + const unsigned int *hook_entries, + const unsigned int *underflows, + unsigned int valid_hooks) +{ + unsigned int h; + int err; + + if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || + (unsigned char *)e + sizeof(struct arpt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) + return -EINVAL; + + if (e->next_offset + < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) + return -EINVAL; + + if (!arp_checkentry(&e->arp)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); + if (err) + return err; + + /* Check hooks & underflows */ + for (h = 0; h < NF_ARP_NUMHOOKS; h++) { + if (!(valid_hooks & (1 << h))) + continue; + if ((unsigned char *)e - base == hook_entries[h]) + newinfo->hook_entry[h] = hook_entries[h]; + if ((unsigned char *)e - base == underflows[h]) { + if (!check_underflow(e)) + return -EINVAL; + + newinfo->underflow[h] = underflows[h]; + } + } + + /* Clear counters and comefrom */ + e->counters = ((struct xt_counters) { 0, 0 }); + e->comefrom = 0; + return 0; +} + +static void cleanup_entry(struct arpt_entry *e, struct net *net) +{ + struct xt_tgdtor_param par; + struct xt_entry_target *t; + + t = arpt_get_target(e); + par.net = net; + par.target = t->u.kernel.target; + par.targinfo = t->data; + par.family = NFPROTO_ARP; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); + xt_percpu_counter_free(&e->counters); +} + +/* Checks and translates the user-supplied table segment (held in + * newinfo). + */ +static int translate_table(struct net *net, + struct xt_table_info *newinfo, + void *entry0, + const struct arpt_replace *repl) +{ + struct xt_percpu_counter_alloc_state alloc_state = { 0 }; + struct arpt_entry *iter; + unsigned int *offsets; + unsigned int i; + int ret = 0; + + newinfo->size = repl->size; + newinfo->number = repl->num_entries; + + /* Init all hooks to impossible value. */ + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + newinfo->hook_entry[i] = 0xFFFFFFFF; + newinfo->underflow[i] = 0xFFFFFFFF; + } + + offsets = xt_alloc_entry_offsets(newinfo->number); + if (!offsets) + return -ENOMEM; + i = 0; + + /* Walk through entries, checking offsets. */ + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = check_entry_size_and_hooks(iter, newinfo, entry0, + entry0 + repl->size, + repl->hook_entry, + repl->underflow, + repl->valid_hooks); + if (ret != 0) + goto out_free; + if (i < repl->num_entries) + offsets[i] = (void *)iter - entry0; + ++i; + if (strcmp(arpt_get_target(iter)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; + } + + ret = -EINVAL; + if (i != repl->num_entries) + goto out_free; + + ret = xt_check_table_hooks(newinfo, repl->valid_hooks); + if (ret) + goto out_free; + + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { + ret = -ELOOP; + goto out_free; + } + kvfree(offsets); + + /* Finally, each sanity check must pass */ + i = 0; + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = find_check_entry(iter, net, repl->name, repl->size, + &alloc_state); + if (ret != 0) + break; + ++i; + } + + if (ret != 0) { + xt_entry_foreach(iter, entry0, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter, net); + } + return ret; + } + + return ret; + out_free: + kvfree(offsets); + return ret; +} + +static void get_counters(const struct xt_table_info *t, + struct xt_counters counters[]) +{ + struct arpt_entry *iter; + unsigned int cpu; + unsigned int i; + + for_each_possible_cpu(cpu) { + seqcount_t *s = &per_cpu(xt_recseq, cpu); + + i = 0; + xt_entry_foreach(iter, t->entries, t->size) { + struct xt_counters *tmp; + u64 bcnt, pcnt; + unsigned int start; + + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); + do { + start = read_seqcount_begin(s); + bcnt = tmp->bcnt; + pcnt = tmp->pcnt; + } while (read_seqcount_retry(s, start)); + + ADD_COUNTER(counters[i], bcnt, pcnt); + ++i; + cond_resched(); + } + } +} + +static void get_old_counters(const struct xt_table_info *t, + struct xt_counters counters[]) +{ + struct arpt_entry *iter; + unsigned int cpu, i; + + for_each_possible_cpu(cpu) { + i = 0; + xt_entry_foreach(iter, t->entries, t->size) { + struct xt_counters *tmp; + + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); + ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); + ++i; + } + cond_resched(); + } +} + +static struct xt_counters *alloc_counters(const struct xt_table *table) +{ + unsigned int countersize; + struct xt_counters *counters; + const struct xt_table_info *private = table->private; + + /* We need atomic snapshot of counters: rest doesn't change + * (other than comefrom, which userspace doesn't care + * about). + */ + countersize = sizeof(struct xt_counters) * private->number; + counters = vzalloc(countersize); + + if (counters == NULL) + return ERR_PTR(-ENOMEM); + + get_counters(private, counters); + + return counters; +} + +static int copy_entries_to_user(unsigned int total_size, + const struct xt_table *table, + void __user *userptr) +{ + unsigned int off, num; + const struct arpt_entry *e; + struct xt_counters *counters; + struct xt_table_info *private = table->private; + int ret = 0; + void *loc_cpu_entry; + + counters = alloc_counters(table); + if (IS_ERR(counters)) + return PTR_ERR(counters); + + loc_cpu_entry = private->entries; + + /* FIXME: use iterator macros --RR */ + /* ... then go back and fix counters and names */ + for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ + const struct xt_entry_target *t; + + e = loc_cpu_entry + off; + if (copy_to_user(userptr + off, e, sizeof(*e))) { + ret = -EFAULT; + goto free_counters; + } + if (copy_to_user(userptr + off + + offsetof(struct arpt_entry, counters), + &counters[num], + sizeof(counters[num])) != 0) { + ret = -EFAULT; + goto free_counters; + } + + t = arpt_get_target_c(e); + if (xt_target_to_user(t, userptr + off + e->target_offset)) { + ret = -EFAULT; + goto free_counters; + } + } + + free_counters: + vfree(counters); + return ret; +} + +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT +static void compat_standard_from_user(void *dst, const void *src) +{ + int v = *(compat_int_t *)src; + + if (v > 0) + v += xt_compat_calc_jump(NFPROTO_ARP, v); + memcpy(dst, &v, sizeof(v)); +} + +static int compat_standard_to_user(void __user *dst, const void *src) +{ + compat_int_t cv = *(int *)src; + + if (cv > 0) + cv -= xt_compat_calc_jump(NFPROTO_ARP, cv); + return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; +} + +static int compat_calc_entry(const struct arpt_entry *e, + const struct xt_table_info *info, + const void *base, struct xt_table_info *newinfo) +{ + const struct xt_entry_target *t; + unsigned int entry_offset; + int off, i, ret; + + off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); + entry_offset = (void *)e - base; + + t = arpt_get_target_c(e); + off += xt_compat_target_offset(t->u.kernel.target); + newinfo->size -= off; + ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); + if (ret) + return ret; + + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + if (info->hook_entry[i] && + (e < (struct arpt_entry *)(base + info->hook_entry[i]))) + newinfo->hook_entry[i] -= off; + if (info->underflow[i] && + (e < (struct arpt_entry *)(base + info->underflow[i]))) + newinfo->underflow[i] -= off; + } + return 0; +} + +static int compat_table_info(const struct xt_table_info *info, + struct xt_table_info *newinfo) +{ + struct arpt_entry *iter; + const void *loc_cpu_entry; + int ret; + + if (!newinfo || !info) + return -EINVAL; + + /* we dont care about newinfo->entries */ + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + newinfo->initial_entries = 0; + loc_cpu_entry = info->entries; + ret = xt_compat_init_offsets(NFPROTO_ARP, info->number); + if (ret) + return ret; + xt_entry_foreach(iter, loc_cpu_entry, info->size) { + ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); + if (ret != 0) + return ret; + } + return 0; +} +#endif + +static int get_info(struct net *net, void __user *user, const int *len) +{ + char name[XT_TABLE_MAXNAMELEN]; + struct xt_table *t; + int ret; + + if (*len != sizeof(struct arpt_getinfo)) + return -EINVAL; + + if (copy_from_user(name, user, sizeof(name)) != 0) + return -EFAULT; + + name[XT_TABLE_MAXNAMELEN-1] = '\0'; +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT + if (in_compat_syscall()) + xt_compat_lock(NFPROTO_ARP); +#endif + t = xt_request_find_table_lock(net, NFPROTO_ARP, name); + if (!IS_ERR(t)) { + struct arpt_getinfo info; + const struct xt_table_info *private = t->private; +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT + struct xt_table_info tmp; + + if (in_compat_syscall()) { + ret = compat_table_info(private, &tmp); + xt_compat_flush_offsets(NFPROTO_ARP); + private = &tmp; + } +#endif + memset(&info, 0, sizeof(info)); + info.valid_hooks = t->valid_hooks; + memcpy(info.hook_entry, private->hook_entry, + sizeof(info.hook_entry)); + memcpy(info.underflow, private->underflow, + sizeof(info.underflow)); + info.num_entries = private->number; + info.size = private->size; + strcpy(info.name, name); + + if (copy_to_user(user, &info, *len) != 0) + ret = -EFAULT; + else + ret = 0; + xt_table_unlock(t); + module_put(t->me); + } else + ret = PTR_ERR(t); +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT + if (in_compat_syscall()) + xt_compat_unlock(NFPROTO_ARP); +#endif + return ret; +} + +static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, + const int *len) +{ + int ret; + struct arpt_get_entries get; + struct xt_table *t; + + if (*len < sizeof(get)) + return -EINVAL; + if (copy_from_user(&get, uptr, sizeof(get)) != 0) + return -EFAULT; + if (*len != sizeof(struct arpt_get_entries) + get.size) + return -EINVAL; + + get.name[sizeof(get.name) - 1] = '\0'; + + t = xt_find_table_lock(net, NFPROTO_ARP, get.name); + if (!IS_ERR(t)) { + const struct xt_table_info *private = t->private; + + if (get.size == private->size) + ret = copy_entries_to_user(private->size, + t, uptr->entrytable); + else + ret = -EAGAIN; + + module_put(t->me); + xt_table_unlock(t); + } else + ret = PTR_ERR(t); + + return ret; +} + +static int __do_replace(struct net *net, const char *name, + unsigned int valid_hooks, + struct xt_table_info *newinfo, + unsigned int num_counters, + void __user *counters_ptr) +{ + int ret; + struct xt_table *t; + struct xt_table_info *oldinfo; + struct xt_counters *counters; + void *loc_cpu_old_entry; + struct arpt_entry *iter; + + ret = 0; + counters = xt_counters_alloc(num_counters); + if (!counters) { + ret = -ENOMEM; + goto out; + } + + t = xt_request_find_table_lock(net, NFPROTO_ARP, name); + if (IS_ERR(t)) { + ret = PTR_ERR(t); + goto free_newinfo_counters_untrans; + } + + /* You lied! */ + if (valid_hooks != t->valid_hooks) { + ret = -EINVAL; + goto put_module; + } + + oldinfo = xt_replace_table(t, num_counters, newinfo, &ret); + if (!oldinfo) + goto put_module; + + /* Update module usage count based on number of rules */ + if ((oldinfo->number > oldinfo->initial_entries) || + (newinfo->number <= oldinfo->initial_entries)) + module_put(t->me); + if ((oldinfo->number > oldinfo->initial_entries) && + (newinfo->number <= oldinfo->initial_entries)) + module_put(t->me); + + xt_table_unlock(t); + + get_old_counters(oldinfo, counters); + + /* Decrease module usage counts and free resource */ + loc_cpu_old_entry = oldinfo->entries; + xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + cleanup_entry(iter, net); + + xt_free_table_info(oldinfo); + if (copy_to_user(counters_ptr, counters, + sizeof(struct xt_counters) * num_counters) != 0) { + /* Silent error, can't fail, new table is already in place */ + net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n"); + } + vfree(counters); + return ret; + + put_module: + module_put(t->me); + xt_table_unlock(t); + free_newinfo_counters_untrans: + vfree(counters); + out: + return ret; +} + +static int do_replace(struct net *net, sockptr_t arg, unsigned int len) +{ + int ret; + struct arpt_replace tmp; + struct xt_table_info *newinfo; + void *loc_cpu_entry; + struct arpt_entry *iter; + + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) + return -EFAULT; + + /* overflow check */ + if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) + return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + + tmp.name[sizeof(tmp.name)-1] = 0; + + newinfo = xt_alloc_table_info(tmp.size); + if (!newinfo) + return -ENOMEM; + + loc_cpu_entry = newinfo->entries; + if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), + tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } + + ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); + if (ret != 0) + goto free_newinfo; + + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, + tmp.num_counters, tmp.counters); + if (ret) + goto free_newinfo_untrans; + return 0; + + free_newinfo_untrans: + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter, net); + free_newinfo: + xt_free_table_info(newinfo); + return ret; +} + +static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) +{ + unsigned int i; + struct xt_counters_info tmp; + struct xt_counters *paddc; + struct xt_table *t; + const struct xt_table_info *private; + int ret = 0; + struct arpt_entry *iter; + unsigned int addend; + + paddc = xt_copy_counters(arg, len, &tmp); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); + + t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name); + if (IS_ERR(t)) { + ret = PTR_ERR(t); + goto free; + } + + local_bh_disable(); + private = t->private; + if (private->number != tmp.num_counters) { + ret = -EINVAL; + goto unlock_up_free; + } + + i = 0; + + addend = xt_write_recseq_begin(); + xt_entry_foreach(iter, private->entries, private->size) { + struct xt_counters *tmp; + + tmp = xt_get_this_cpu_counter(&iter->counters); + ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); + ++i; + } + xt_write_recseq_end(addend); + unlock_up_free: + local_bh_enable(); + xt_table_unlock(t); + module_put(t->me); + free: + vfree(paddc); + + return ret; +} + +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT +struct compat_arpt_replace { + char name[XT_TABLE_MAXNAMELEN]; + u32 valid_hooks; + u32 num_entries; + u32 size; + u32 hook_entry[NF_ARP_NUMHOOKS]; + u32 underflow[NF_ARP_NUMHOOKS]; + u32 num_counters; + compat_uptr_t counters; + struct compat_arpt_entry entries[]; +}; + +static inline void compat_release_entry(struct compat_arpt_entry *e) +{ + struct xt_entry_target *t; + + t = compat_arpt_get_target(e); + module_put(t->u.kernel.target->me); +} + +static int +check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, + struct xt_table_info *newinfo, + unsigned int *size, + const unsigned char *base, + const unsigned char *limit) +{ + struct xt_entry_target *t; + struct xt_target *target; + unsigned int entry_offset; + int ret, off; + + if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || + (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) + return -EINVAL; + + if (e->next_offset < sizeof(struct compat_arpt_entry) + + sizeof(struct compat_xt_entry_target)) + return -EINVAL; + + if (!arp_checkentry(&e->arp)) + return -EINVAL; + + ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); + if (ret) + return ret; + + off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); + entry_offset = (void *)e - (void *)base; + + t = compat_arpt_get_target(e); + target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { + ret = PTR_ERR(target); + goto out; + } + t->u.kernel.target = target; + + off += xt_compat_target_offset(target); + *size += off; + ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); + if (ret) + goto release_target; + + return 0; + +release_target: + module_put(t->u.kernel.target->me); +out: + return ret; +} + +static void +compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, + unsigned int *size, + struct xt_table_info *newinfo, unsigned char *base) +{ + struct xt_entry_target *t; + struct arpt_entry *de; + unsigned int origsize; + int h; + + origsize = *size; + de = *dstptr; + memcpy(de, e, sizeof(struct arpt_entry)); + memcpy(&de->counters, &e->counters, sizeof(e->counters)); + + *dstptr += sizeof(struct arpt_entry); + *size += sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); + + de->target_offset = e->target_offset - (origsize - *size); + t = compat_arpt_get_target(e); + xt_compat_target_from_user(t, dstptr, size); + + de->next_offset = e->next_offset - (origsize - *size); + for (h = 0; h < NF_ARP_NUMHOOKS; h++) { + if ((unsigned char *)de - base < newinfo->hook_entry[h]) + newinfo->hook_entry[h] -= origsize - *size; + if ((unsigned char *)de - base < newinfo->underflow[h]) + newinfo->underflow[h] -= origsize - *size; + } +} + +static int translate_compat_table(struct net *net, + struct xt_table_info **pinfo, + void **pentry0, + const struct compat_arpt_replace *compatr) +{ + unsigned int i, j; + struct xt_table_info *newinfo, *info; + void *pos, *entry0, *entry1; + struct compat_arpt_entry *iter0; + struct arpt_replace repl; + unsigned int size; + int ret; + + info = *pinfo; + entry0 = *pentry0; + size = compatr->size; + info->number = compatr->num_entries; + + j = 0; + xt_compat_lock(NFPROTO_ARP); + ret = xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries); + if (ret) + goto out_unlock; + /* Walk through entries, checking offsets. */ + xt_entry_foreach(iter0, entry0, compatr->size) { + ret = check_compat_entry_size_and_hooks(iter0, info, &size, + entry0, + entry0 + compatr->size); + if (ret != 0) + goto out_unlock; + ++j; + } + + ret = -EINVAL; + if (j != compatr->num_entries) + goto out_unlock; + + ret = -ENOMEM; + newinfo = xt_alloc_table_info(size); + if (!newinfo) + goto out_unlock; + + memset(newinfo->entries, 0, size); + + newinfo->number = compatr->num_entries; + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + newinfo->hook_entry[i] = compatr->hook_entry[i]; + newinfo->underflow[i] = compatr->underflow[i]; + } + entry1 = newinfo->entries; + pos = entry1; + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + + /* all module references in entry0 are now gone */ + + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); + + memcpy(&repl, compatr, sizeof(*compatr)); + + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; + } + + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(net, newinfo, entry1, &repl); + if (ret) + goto free_newinfo; + + *pinfo = newinfo; + *pentry0 = entry1; + xt_free_table_info(info); + return 0; + +free_newinfo: + xt_free_table_info(newinfo); + return ret; +out_unlock: + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); + xt_entry_foreach(iter0, entry0, compatr->size) { + if (j-- == 0) + break; + compat_release_entry(iter0); + } + return ret; +} + +static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) +{ + int ret; + struct compat_arpt_replace tmp; + struct xt_table_info *newinfo; + void *loc_cpu_entry; + struct arpt_entry *iter; + + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) + return -EFAULT; + + /* overflow check */ + if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) + return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + + tmp.name[sizeof(tmp.name)-1] = 0; + + newinfo = xt_alloc_table_info(tmp.size); + if (!newinfo) + return -ENOMEM; + + loc_cpu_entry = newinfo->entries; + if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), + tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } + + ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); + if (ret != 0) + goto free_newinfo; + + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, + tmp.num_counters, compat_ptr(tmp.counters)); + if (ret) + goto free_newinfo_untrans; + return 0; + + free_newinfo_untrans: + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter, net); + free_newinfo: + xt_free_table_info(newinfo); + return ret; +} + +static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, + compat_uint_t *size, + struct xt_counters *counters, + unsigned int i) +{ + struct xt_entry_target *t; + struct compat_arpt_entry __user *ce; + u_int16_t target_offset, next_offset; + compat_uint_t origsize; + int ret; + + origsize = *size; + ce = *dstptr; + if (copy_to_user(ce, e, sizeof(struct arpt_entry)) != 0 || + copy_to_user(&ce->counters, &counters[i], + sizeof(counters[i])) != 0) + return -EFAULT; + + *dstptr += sizeof(struct compat_arpt_entry); + *size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); + + target_offset = e->target_offset - (origsize - *size); + + t = arpt_get_target(e); + ret = xt_compat_target_to_user(t, dstptr, size); + if (ret) + return ret; + next_offset = e->next_offset - (origsize - *size); + if (put_user(target_offset, &ce->target_offset) != 0 || + put_user(next_offset, &ce->next_offset) != 0) + return -EFAULT; + return 0; +} + +static int compat_copy_entries_to_user(unsigned int total_size, + struct xt_table *table, + void __user *userptr) +{ + struct xt_counters *counters; + const struct xt_table_info *private = table->private; + void __user *pos; + unsigned int size; + int ret = 0; + unsigned int i = 0; + struct arpt_entry *iter; + + counters = alloc_counters(table); + if (IS_ERR(counters)) + return PTR_ERR(counters); + + pos = userptr; + size = total_size; + xt_entry_foreach(iter, private->entries, total_size) { + ret = compat_copy_entry_to_user(iter, &pos, + &size, counters, i++); + if (ret != 0) + break; + } + vfree(counters); + return ret; +} + +struct compat_arpt_get_entries { + char name[XT_TABLE_MAXNAMELEN]; + compat_uint_t size; + struct compat_arpt_entry entrytable[]; +}; + +static int compat_get_entries(struct net *net, + struct compat_arpt_get_entries __user *uptr, + int *len) +{ + int ret; + struct compat_arpt_get_entries get; + struct xt_table *t; + + if (*len < sizeof(get)) + return -EINVAL; + if (copy_from_user(&get, uptr, sizeof(get)) != 0) + return -EFAULT; + if (*len != sizeof(struct compat_arpt_get_entries) + get.size) + return -EINVAL; + + get.name[sizeof(get.name) - 1] = '\0'; + + xt_compat_lock(NFPROTO_ARP); + t = xt_find_table_lock(net, NFPROTO_ARP, get.name); + if (!IS_ERR(t)) { + const struct xt_table_info *private = t->private; + struct xt_table_info info; + + ret = compat_table_info(private, &info); + if (!ret && get.size == info.size) { + ret = compat_copy_entries_to_user(private->size, + t, uptr->entrytable); + } else if (!ret) + ret = -EAGAIN; + + xt_compat_flush_offsets(NFPROTO_ARP); + module_put(t->me); + xt_table_unlock(t); + } else + ret = PTR_ERR(t); + + xt_compat_unlock(NFPROTO_ARP); + return ret; +} +#endif + +static int do_arpt_set_ctl(struct sock *sk, int cmd, sockptr_t arg, + unsigned int len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case ARPT_SO_SET_REPLACE: +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT + if (in_compat_syscall()) + ret = compat_do_replace(sock_net(sk), arg, len); + else +#endif + ret = do_replace(sock_net(sk), arg, len); + break; + + case ARPT_SO_SET_ADD_COUNTERS: + ret = do_add_counters(sock_net(sk), arg, len); + break; + + default: + ret = -EINVAL; + } + + return ret; +} + +static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case ARPT_SO_GET_INFO: + ret = get_info(sock_net(sk), user, len); + break; + + case ARPT_SO_GET_ENTRIES: +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT + if (in_compat_syscall()) + ret = compat_get_entries(sock_net(sk), user, len); + else +#endif + ret = get_entries(sock_net(sk), user, len); + break; + + case ARPT_SO_GET_REVISION_TARGET: { + struct xt_get_revision rev; + + if (*len != sizeof(rev)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&rev, user, sizeof(rev)) != 0) { + ret = -EFAULT; + break; + } + rev.name[sizeof(rev.name)-1] = 0; + + try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, + rev.revision, 1, &ret), + "arpt_%s", rev.name); + break; + } + + default: + ret = -EINVAL; + } + + return ret; +} + +static void __arpt_unregister_table(struct net *net, struct xt_table *table) +{ + struct xt_table_info *private; + void *loc_cpu_entry; + struct module *table_owner = table->me; + struct arpt_entry *iter; + + private = xt_unregister_table(table); + + /* Decrease module usage counts and free resources */ + loc_cpu_entry = private->entries; + xt_entry_foreach(iter, loc_cpu_entry, private->size) + cleanup_entry(iter, net); + if (private->number > private->initial_entries) + module_put(table_owner); + xt_free_table_info(private); +} + +int arpt_register_table(struct net *net, + const struct xt_table *table, + const struct arpt_replace *repl, + const struct nf_hook_ops *template_ops) +{ + struct nf_hook_ops *ops; + unsigned int num_ops; + int ret, i; + struct xt_table_info *newinfo; + struct xt_table_info bootstrap = {0}; + void *loc_cpu_entry; + struct xt_table *new_table; + + newinfo = xt_alloc_table_info(repl->size); + if (!newinfo) + return -ENOMEM; + + loc_cpu_entry = newinfo->entries; + memcpy(loc_cpu_entry, repl->entries, repl->size); + + ret = translate_table(net, newinfo, loc_cpu_entry, repl); + if (ret != 0) { + xt_free_table_info(newinfo); + return ret; + } + + new_table = xt_register_table(net, table, &bootstrap, newinfo); + if (IS_ERR(new_table)) { + struct arpt_entry *iter; + + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter, net); + xt_free_table_info(newinfo); + return PTR_ERR(new_table); + } + + num_ops = hweight32(table->valid_hooks); + if (num_ops == 0) { + ret = -EINVAL; + goto out_free; + } + + ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL); + if (!ops) { + ret = -ENOMEM; + goto out_free; + } + + for (i = 0; i < num_ops; i++) + ops[i].priv = new_table; + + new_table->ops = ops; + + ret = nf_register_net_hooks(net, ops, num_ops); + if (ret != 0) + goto out_free; + + return ret; + +out_free: + __arpt_unregister_table(net, new_table); + return ret; +} + +void arpt_unregister_table_pre_exit(struct net *net, const char *name) +{ + struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name); + + if (table) + nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); +} +EXPORT_SYMBOL(arpt_unregister_table_pre_exit); + +void arpt_unregister_table(struct net *net, const char *name) +{ + struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name); + + if (table) + __arpt_unregister_table(net, table); +} + +/* The built-in targets: standard (NULL) and error. */ +static struct xt_target arpt_builtin_tg[] __read_mostly = { + { + .name = XT_STANDARD_TARGET, + .targetsize = sizeof(int), + .family = NFPROTO_ARP, +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT + .compatsize = sizeof(compat_int_t), + .compat_from_user = compat_standard_from_user, + .compat_to_user = compat_standard_to_user, +#endif + }, + { + .name = XT_ERROR_TARGET, + .target = arpt_error, + .targetsize = XT_FUNCTION_MAXNAMELEN, + .family = NFPROTO_ARP, + }, +}; + +static struct nf_sockopt_ops arpt_sockopts = { + .pf = PF_INET, + .set_optmin = ARPT_BASE_CTL, + .set_optmax = ARPT_SO_SET_MAX+1, + .set = do_arpt_set_ctl, + .get_optmin = ARPT_BASE_CTL, + .get_optmax = ARPT_SO_GET_MAX+1, + .get = do_arpt_get_ctl, + .owner = THIS_MODULE, +}; + +static int __net_init arp_tables_net_init(struct net *net) +{ + return xt_proto_init(net, NFPROTO_ARP); +} + +static void __net_exit arp_tables_net_exit(struct net *net) +{ + xt_proto_fini(net, NFPROTO_ARP); +} + +static struct pernet_operations arp_tables_net_ops = { + .init = arp_tables_net_init, + .exit = arp_tables_net_exit, +}; + +static int __init arp_tables_init(void) +{ + int ret; + + ret = register_pernet_subsys(&arp_tables_net_ops); + if (ret < 0) + goto err1; + + /* No one else will be downing sem now, so we won't sleep */ + ret = xt_register_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); + if (ret < 0) + goto err2; + + /* Register setsockopt */ + ret = nf_register_sockopt(&arpt_sockopts); + if (ret < 0) + goto err4; + + return 0; + +err4: + xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); +err2: + unregister_pernet_subsys(&arp_tables_net_ops); +err1: + return ret; +} + +static void __exit arp_tables_fini(void) +{ + nf_unregister_sockopt(&arpt_sockopts); + xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); + unregister_pernet_subsys(&arp_tables_net_ops); +} + +EXPORT_SYMBOL(arpt_register_table); +EXPORT_SYMBOL(arpt_unregister_table); +EXPORT_SYMBOL(arpt_do_table); + +module_init(arp_tables_init); +module_exit(arp_tables_fini); diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c new file mode 100644 index 0000000000..a4e07e5e9c --- /dev/null +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* module that allows mangling of the arp payload */ +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/netfilter_arp/arpt_mangle.h> +#include <net/sock.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); +MODULE_DESCRIPTION("arptables arp payload mangle target"); + +static unsigned int +target(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct arpt_mangle *mangle = par->targinfo; + const struct arphdr *arp; + unsigned char *arpptr; + int pln, hln; + + if (skb_ensure_writable(skb, skb->len)) + return NF_DROP; + + arp = arp_hdr(skb); + arpptr = skb_network_header(skb) + sizeof(*arp); + pln = arp->ar_pln; + hln = arp->ar_hln; + /* We assume that pln and hln were checked in the match */ + if (mangle->flags & ARPT_MANGLE_SDEV) { + if (ARPT_DEV_ADDR_LEN_MAX < hln || + (arpptr + hln > skb_tail_pointer(skb))) + return NF_DROP; + memcpy(arpptr, mangle->src_devaddr, hln); + } + arpptr += hln; + if (mangle->flags & ARPT_MANGLE_SIP) { + if (ARPT_MANGLE_ADDR_LEN_MAX < pln || + (arpptr + pln > skb_tail_pointer(skb))) + return NF_DROP; + memcpy(arpptr, &mangle->u_s.src_ip, pln); + } + arpptr += pln; + if (mangle->flags & ARPT_MANGLE_TDEV) { + if (ARPT_DEV_ADDR_LEN_MAX < hln || + (arpptr + hln > skb_tail_pointer(skb))) + return NF_DROP; + memcpy(arpptr, mangle->tgt_devaddr, hln); + } + arpptr += hln; + if (mangle->flags & ARPT_MANGLE_TIP) { + if (ARPT_MANGLE_ADDR_LEN_MAX < pln || + (arpptr + pln > skb_tail_pointer(skb))) + return NF_DROP; + memcpy(arpptr, &mangle->u_t.tgt_ip, pln); + } + return mangle->target; +} + +static int checkentry(const struct xt_tgchk_param *par) +{ + const struct arpt_mangle *mangle = par->targinfo; + + if (mangle->flags & ~ARPT_MANGLE_MASK || + !(mangle->flags & ARPT_MANGLE_MASK)) + return -EINVAL; + + if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT && + mangle->target != XT_CONTINUE) + return -EINVAL; + return 0; +} + +static struct xt_target arpt_mangle_reg __read_mostly = { + .name = "mangle", + .family = NFPROTO_ARP, + .target = target, + .targetsize = sizeof(struct arpt_mangle), + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init arpt_mangle_init(void) +{ + return xt_register_target(&arpt_mangle_reg); +} + +static void __exit arpt_mangle_fini(void) +{ + xt_unregister_target(&arpt_mangle_reg); +} + +module_init(arpt_mangle_init); +module_exit(arpt_mangle_fini); diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c new file mode 100644 index 0000000000..78cd5ee244 --- /dev/null +++ b/net/ipv4/netfilter/arptable_filter.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Filtering ARP tables module. + * + * Copyright (C) 2002 David S. Miller (davem@redhat.com) + * + */ + +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_arp/arp_tables.h> +#include <linux/slab.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); +MODULE_DESCRIPTION("arptables filter table"); + +#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ + (1 << NF_ARP_FORWARD)) + +static const struct xt_table packet_filter = { + .name = "filter", + .valid_hooks = FILTER_VALID_HOOKS, + .me = THIS_MODULE, + .af = NFPROTO_ARP, + .priority = NF_IP_PRI_FILTER, +}; + +static struct nf_hook_ops *arpfilter_ops __read_mostly; + +static int arptable_filter_table_init(struct net *net) +{ + struct arpt_replace *repl; + int err; + + repl = arpt_alloc_initial_table(&packet_filter); + if (repl == NULL) + return -ENOMEM; + err = arpt_register_table(net, &packet_filter, repl, arpfilter_ops); + kfree(repl); + return err; +} + +static void __net_exit arptable_filter_net_pre_exit(struct net *net) +{ + arpt_unregister_table_pre_exit(net, "filter"); +} + +static void __net_exit arptable_filter_net_exit(struct net *net) +{ + arpt_unregister_table(net, "filter"); +} + +static struct pernet_operations arptable_filter_net_ops = { + .exit = arptable_filter_net_exit, + .pre_exit = arptable_filter_net_pre_exit, +}; + +static int __init arptable_filter_init(void) +{ + int ret = xt_register_template(&packet_filter, + arptable_filter_table_init); + + if (ret < 0) + return ret; + + arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arpt_do_table); + if (IS_ERR(arpfilter_ops)) { + xt_unregister_template(&packet_filter); + return PTR_ERR(arpfilter_ops); + } + + ret = register_pernet_subsys(&arptable_filter_net_ops); + if (ret < 0) { + xt_unregister_template(&packet_filter); + kfree(arpfilter_ops); + return ret; + } + + return ret; +} + +static void __exit arptable_filter_fini(void) +{ + unregister_pernet_subsys(&arptable_filter_net_ops); + xt_unregister_template(&packet_filter); + kfree(arpfilter_ops); +} + +module_init(arptable_filter_init); +module_exit(arptable_filter_fini); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c new file mode 100644 index 0000000000..7da1df4997 --- /dev/null +++ b/net/ipv4/netfilter/ip_tables.c @@ -0,0 +1,1886 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Packet matching code. + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> + * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net> + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/cache.h> +#include <linux/capability.h> +#include <linux/skbuff.h> +#include <linux/kmod.h> +#include <linux/vmalloc.h> +#include <linux/netdevice.h> +#include <linux/module.h> +#include <net/ip.h> +#include <net/compat.h> +#include <linux/uaccess.h> +#include <linux/mutex.h> +#include <linux/proc_fs.h> +#include <linux/err.h> +#include <linux/cpumask.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <net/netfilter/nf_log.h> +#include "../../netfilter/xt_repldata.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); +MODULE_DESCRIPTION("IPv4 packet filter"); + +void *ipt_alloc_initial_table(const struct xt_table *info) +{ + return xt_alloc_initial_table(ipt, IPT); +} +EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); + +/* Returns whether matches rule or not. */ +/* Performance critical - called for every packet */ +static inline bool +ip_packet_match(const struct iphdr *ip, + const char *indev, + const char *outdev, + const struct ipt_ip *ipinfo, + int isfrag) +{ + unsigned long ret; + + if (NF_INVF(ipinfo, IPT_INV_SRCIP, + (ip->saddr & ipinfo->smsk.s_addr) != ipinfo->src.s_addr) || + NF_INVF(ipinfo, IPT_INV_DSTIP, + (ip->daddr & ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr)) + return false; + + ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask); + + if (NF_INVF(ipinfo, IPT_INV_VIA_IN, ret != 0)) + return false; + + ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask); + + if (NF_INVF(ipinfo, IPT_INV_VIA_OUT, ret != 0)) + return false; + + /* Check specific protocol */ + if (ipinfo->proto && + NF_INVF(ipinfo, IPT_INV_PROTO, ip->protocol != ipinfo->proto)) + return false; + + /* If we have a fragment rule but the packet is not a fragment + * then we return zero */ + if (NF_INVF(ipinfo, IPT_INV_FRAG, + (ipinfo->flags & IPT_F_FRAG) && !isfrag)) + return false; + + return true; +} + +static bool +ip_checkentry(const struct ipt_ip *ip) +{ + if (ip->flags & ~IPT_F_MASK) + return false; + if (ip->invflags & ~IPT_INV_MASK) + return false; + return true; +} + +static unsigned int +ipt_error(struct sk_buff *skb, const struct xt_action_param *par) +{ + net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo); + + return NF_DROP; +} + +/* Performance critical */ +static inline struct ipt_entry * +get_entry(const void *base, unsigned int offset) +{ + return (struct ipt_entry *)(base + offset); +} + +/* All zeroes == unconditional rule. */ +/* Mildly perf critical (only if packet tracing is on) */ +static inline bool unconditional(const struct ipt_entry *e) +{ + static const struct ipt_ip uncond; + + return e->target_offset == sizeof(struct ipt_entry) && + memcmp(&e->ip, &uncond, sizeof(uncond)) == 0; +} + +/* for const-correctness */ +static inline const struct xt_entry_target * +ipt_get_target_c(const struct ipt_entry *e) +{ + return ipt_get_target((struct ipt_entry *)e); +} + +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) +static const char *const hooknames[] = { + [NF_INET_PRE_ROUTING] = "PREROUTING", + [NF_INET_LOCAL_IN] = "INPUT", + [NF_INET_FORWARD] = "FORWARD", + [NF_INET_LOCAL_OUT] = "OUTPUT", + [NF_INET_POST_ROUTING] = "POSTROUTING", +}; + +enum nf_ip_trace_comments { + NF_IP_TRACE_COMMENT_RULE, + NF_IP_TRACE_COMMENT_RETURN, + NF_IP_TRACE_COMMENT_POLICY, +}; + +static const char *const comments[] = { + [NF_IP_TRACE_COMMENT_RULE] = "rule", + [NF_IP_TRACE_COMMENT_RETURN] = "return", + [NF_IP_TRACE_COMMENT_POLICY] = "policy", +}; + +static const struct nf_loginfo trace_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 4, + .logflags = NF_LOG_DEFAULT_MASK, + }, + }, +}; + +/* Mildly perf critical (only if packet tracing is on) */ +static inline int +get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e, + const char *hookname, const char **chainname, + const char **comment, unsigned int *rulenum) +{ + const struct xt_standard_target *t = (void *)ipt_get_target_c(s); + + if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) { + /* Head of user chain: ERROR target with chainname */ + *chainname = t->target.data; + (*rulenum) = 0; + } else if (s == e) { + (*rulenum)++; + + if (unconditional(s) && + strcmp(t->target.u.kernel.target->name, + XT_STANDARD_TARGET) == 0 && + t->verdict < 0) { + /* Tail of chains: STANDARD target (return/policy) */ + *comment = *chainname == hookname + ? comments[NF_IP_TRACE_COMMENT_POLICY] + : comments[NF_IP_TRACE_COMMENT_RETURN]; + } + return 1; + } else + (*rulenum)++; + + return 0; +} + +static void trace_packet(struct net *net, + const struct sk_buff *skb, + unsigned int hook, + const struct net_device *in, + const struct net_device *out, + const char *tablename, + const struct xt_table_info *private, + const struct ipt_entry *e) +{ + const struct ipt_entry *root; + const char *hookname, *chainname, *comment; + const struct ipt_entry *iter; + unsigned int rulenum = 0; + + root = get_entry(private->entries, private->hook_entry[hook]); + + hookname = chainname = hooknames[hook]; + comment = comments[NF_IP_TRACE_COMMENT_RULE]; + + xt_entry_foreach(iter, root, private->size - private->hook_entry[hook]) + if (get_chainname_rulenum(iter, e, hookname, + &chainname, &comment, &rulenum) != 0) + break; + + nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + tablename, chainname, comment, rulenum); +} +#endif + +static inline +struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) +{ + return (void *)entry + entry->next_offset; +} + +/* Returns one of the generic firewall policies, like NF_ACCEPT. */ +unsigned int +ipt_do_table(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + const struct xt_table *table = priv; + unsigned int hook = state->hook; + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); + const struct iphdr *ip; + /* Initializing verdict to NF_DROP keeps gcc happy. */ + unsigned int verdict = NF_DROP; + const char *indev, *outdev; + const void *table_base; + struct ipt_entry *e, **jumpstack; + unsigned int stackidx, cpu; + const struct xt_table_info *private; + struct xt_action_param acpar; + unsigned int addend; + + /* Initialization */ + stackidx = 0; + ip = ip_hdr(skb); + indev = state->in ? state->in->name : nulldevname; + outdev = state->out ? state->out->name : nulldevname; + /* We handle fragments by dealing with the first fragment as + * if it was a normal packet. All other fragments are treated + * normally, except that they will NEVER match rules that ask + * things we don't know, ie. tcp syn flag or ports). If the + * rule is also a fragment-specific rule, non-fragments won't + * match it. */ + acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; + acpar.thoff = ip_hdrlen(skb); + acpar.hotdrop = false; + acpar.state = state; + + WARN_ON(!(table->valid_hooks & (1 << hook))); + local_bh_disable(); + addend = xt_write_recseq_begin(); + private = READ_ONCE(table->private); /* Address dependency. */ + cpu = smp_processor_id(); + table_base = private->entries; + jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; + + /* Switch to alternate jumpstack if we're being invoked via TEE. + * TEE issues XT_CONTINUE verdict on original skb so we must not + * clobber the jumpstack. + * + * For recursion via REJECT or SYNPROXY the stack will be clobbered + * but it is no problem since absolute verdict is issued by these. + */ + if (static_key_false(&xt_tee_enabled)) + jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); + + e = get_entry(table_base, private->hook_entry[hook]); + + do { + const struct xt_entry_target *t; + const struct xt_entry_match *ematch; + struct xt_counters *counter; + + WARN_ON(!e); + if (!ip_packet_match(ip, indev, outdev, + &e->ip, acpar.fragoff)) { + no_match: + e = ipt_next_entry(e); + continue; + } + + xt_ematch_foreach(ematch, e) { + acpar.match = ematch->u.kernel.match; + acpar.matchinfo = ematch->data; + if (!acpar.match->match(skb, &acpar)) + goto no_match; + } + + counter = xt_get_this_cpu_counter(&e->counters); + ADD_COUNTER(*counter, skb->len, 1); + + t = ipt_get_target_c(e); + WARN_ON(!t->u.kernel.target); + +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) + /* The packet is traced: log it */ + if (unlikely(skb->nf_trace)) + trace_packet(state->net, skb, hook, state->in, + state->out, table->name, private, e); +#endif + /* Standard target? */ + if (!t->u.kernel.target->target) { + int v; + + v = ((struct xt_standard_target *)t)->verdict; + if (v < 0) { + /* Pop from stack? */ + if (v != XT_RETURN) { + verdict = (unsigned int)(-v) - 1; + break; + } + if (stackidx == 0) { + e = get_entry(table_base, + private->underflow[hook]); + } else { + e = jumpstack[--stackidx]; + e = ipt_next_entry(e); + } + continue; + } + if (table_base + v != ipt_next_entry(e) && + !(e->ip.flags & IPT_F_GOTO)) { + if (unlikely(stackidx >= private->stacksize)) { + verdict = NF_DROP; + break; + } + jumpstack[stackidx++] = e; + } + + e = get_entry(table_base, v); + continue; + } + + acpar.target = t->u.kernel.target; + acpar.targinfo = t->data; + + verdict = t->u.kernel.target->target(skb, &acpar); + if (verdict == XT_CONTINUE) { + /* Target might have changed stuff. */ + ip = ip_hdr(skb); + e = ipt_next_entry(e); + } else { + /* Verdict */ + break; + } + } while (!acpar.hotdrop); + + xt_write_recseq_end(addend); + local_bh_enable(); + + if (acpar.hotdrop) + return NF_DROP; + else return verdict; +} + +/* Figures out from what hook each rule can be called: returns 0 if + there are loops. Puts hook bitmask in comefrom. */ +static int +mark_source_chains(const struct xt_table_info *newinfo, + unsigned int valid_hooks, void *entry0, + unsigned int *offsets) +{ + unsigned int hook; + + /* No recursion; use packet counter to save back ptrs (reset + to 0 as we leave), and comefrom to save source hook bitmask */ + for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { + unsigned int pos = newinfo->hook_entry[hook]; + struct ipt_entry *e = entry0 + pos; + + if (!(valid_hooks & (1 << hook))) + continue; + + /* Set initial back pointer. */ + e->counters.pcnt = pos; + + for (;;) { + const struct xt_standard_target *t + = (void *)ipt_get_target_c(e); + int visited = e->comefrom & (1 << hook); + + if (e->comefrom & (1 << NF_INET_NUMHOOKS)) + return 0; + + e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); + + /* Unconditional return/END. */ + if ((unconditional(e) && + (strcmp(t->target.u.user.name, + XT_STANDARD_TARGET) == 0) && + t->verdict < 0) || visited) { + unsigned int oldpos, size; + + /* Return: backtrack through the last + big jump. */ + do { + e->comefrom ^= (1<<NF_INET_NUMHOOKS); + oldpos = pos; + pos = e->counters.pcnt; + e->counters.pcnt = 0; + + /* We're at the start. */ + if (pos == oldpos) + goto next; + + e = entry0 + pos; + } while (oldpos == pos + e->next_offset); + + /* Move along one */ + size = e->next_offset; + e = entry0 + pos + size; + if (pos + size >= newinfo->size) + return 0; + e->counters.pcnt = pos; + pos += size; + } else { + int newpos = t->verdict; + + if (strcmp(t->target.u.user.name, + XT_STANDARD_TARGET) == 0 && + newpos >= 0) { + /* This a jump; chase it. */ + if (!xt_find_jump_offset(offsets, newpos, + newinfo->number)) + return 0; + } else { + /* ... this is a fallthru */ + newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; + } + e = entry0 + newpos; + e->counters.pcnt = pos; + pos = newpos; + } + } +next: ; + } + return 1; +} + +static void cleanup_match(struct xt_entry_match *m, struct net *net) +{ + struct xt_mtdtor_param par; + + par.net = net; + par.match = m->u.kernel.match; + par.matchinfo = m->data; + par.family = NFPROTO_IPV4; + if (par.match->destroy != NULL) + par.match->destroy(&par); + module_put(par.match->me); +} + +static int +check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) +{ + const struct ipt_ip *ip = par->entryinfo; + + par->match = m->u.kernel.match; + par->matchinfo = m->data; + + return xt_check_match(par, m->u.match_size - sizeof(*m), + ip->proto, ip->invflags & IPT_INV_PROTO); +} + +static int +find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) +{ + struct xt_match *match; + int ret; + + match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, + m->u.user.revision); + if (IS_ERR(match)) + return PTR_ERR(match); + m->u.kernel.match = match; + + ret = check_match(m, par); + if (ret) + goto err; + + return 0; +err: + module_put(m->u.kernel.match->me); + return ret; +} + +static int check_target(struct ipt_entry *e, struct net *net, const char *name) +{ + struct xt_entry_target *t = ipt_get_target(e); + struct xt_tgchk_param par = { + .net = net, + .table = name, + .entryinfo = e, + .target = t->u.kernel.target, + .targinfo = t->data, + .hook_mask = e->comefrom, + .family = NFPROTO_IPV4, + }; + + return xt_check_target(&par, t->u.target_size - sizeof(*t), + e->ip.proto, e->ip.invflags & IPT_INV_PROTO); +} + +static int +find_check_entry(struct ipt_entry *e, struct net *net, const char *name, + unsigned int size, + struct xt_percpu_counter_alloc_state *alloc_state) +{ + struct xt_entry_target *t; + struct xt_target *target; + int ret; + unsigned int j; + struct xt_mtchk_param mtpar; + struct xt_entry_match *ematch; + + if (!xt_percpu_counter_alloc(alloc_state, &e->counters)) + return -ENOMEM; + + j = 0; + memset(&mtpar, 0, sizeof(mtpar)); + mtpar.net = net; + mtpar.table = name; + mtpar.entryinfo = &e->ip; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV4; + xt_ematch_foreach(ematch, e) { + ret = find_check_match(ematch, &mtpar); + if (ret != 0) + goto cleanup_matches; + ++j; + } + + t = ipt_get_target(e); + target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { + ret = PTR_ERR(target); + goto cleanup_matches; + } + t->u.kernel.target = target; + + ret = check_target(e, net, name); + if (ret) + goto err; + + return 0; + err: + module_put(t->u.kernel.target->me); + cleanup_matches: + xt_ematch_foreach(ematch, e) { + if (j-- == 0) + break; + cleanup_match(ematch, net); + } + + xt_percpu_counter_free(&e->counters); + + return ret; +} + +static bool check_underflow(const struct ipt_entry *e) +{ + const struct xt_entry_target *t; + unsigned int verdict; + + if (!unconditional(e)) + return false; + t = ipt_get_target_c(e); + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) + return false; + verdict = ((struct xt_standard_target *)t)->verdict; + verdict = -verdict - 1; + return verdict == NF_DROP || verdict == NF_ACCEPT; +} + +static int +check_entry_size_and_hooks(struct ipt_entry *e, + struct xt_table_info *newinfo, + const unsigned char *base, + const unsigned char *limit, + const unsigned int *hook_entries, + const unsigned int *underflows, + unsigned int valid_hooks) +{ + unsigned int h; + int err; + + if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || + (unsigned char *)e + sizeof(struct ipt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) + return -EINVAL; + + if (e->next_offset + < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) + return -EINVAL; + + if (!ip_checkentry(&e->ip)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); + if (err) + return err; + + /* Check hooks & underflows */ + for (h = 0; h < NF_INET_NUMHOOKS; h++) { + if (!(valid_hooks & (1 << h))) + continue; + if ((unsigned char *)e - base == hook_entries[h]) + newinfo->hook_entry[h] = hook_entries[h]; + if ((unsigned char *)e - base == underflows[h]) { + if (!check_underflow(e)) + return -EINVAL; + + newinfo->underflow[h] = underflows[h]; + } + } + + /* Clear counters and comefrom */ + e->counters = ((struct xt_counters) { 0, 0 }); + e->comefrom = 0; + return 0; +} + +static void +cleanup_entry(struct ipt_entry *e, struct net *net) +{ + struct xt_tgdtor_param par; + struct xt_entry_target *t; + struct xt_entry_match *ematch; + + /* Cleanup all matches */ + xt_ematch_foreach(ematch, e) + cleanup_match(ematch, net); + t = ipt_get_target(e); + + par.net = net; + par.target = t->u.kernel.target; + par.targinfo = t->data; + par.family = NFPROTO_IPV4; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); + xt_percpu_counter_free(&e->counters); +} + +/* Checks and translates the user-supplied table segment (held in + newinfo) */ +static int +translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, + const struct ipt_replace *repl) +{ + struct xt_percpu_counter_alloc_state alloc_state = { 0 }; + struct ipt_entry *iter; + unsigned int *offsets; + unsigned int i; + int ret = 0; + + newinfo->size = repl->size; + newinfo->number = repl->num_entries; + + /* Init all hooks to impossible value. */ + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + newinfo->hook_entry[i] = 0xFFFFFFFF; + newinfo->underflow[i] = 0xFFFFFFFF; + } + + offsets = xt_alloc_entry_offsets(newinfo->number); + if (!offsets) + return -ENOMEM; + i = 0; + /* Walk through entries, checking offsets. */ + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = check_entry_size_and_hooks(iter, newinfo, entry0, + entry0 + repl->size, + repl->hook_entry, + repl->underflow, + repl->valid_hooks); + if (ret != 0) + goto out_free; + if (i < repl->num_entries) + offsets[i] = (void *)iter - entry0; + ++i; + if (strcmp(ipt_get_target(iter)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; + } + + ret = -EINVAL; + if (i != repl->num_entries) + goto out_free; + + ret = xt_check_table_hooks(newinfo, repl->valid_hooks); + if (ret) + goto out_free; + + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { + ret = -ELOOP; + goto out_free; + } + kvfree(offsets); + + /* Finally, each sanity check must pass */ + i = 0; + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = find_check_entry(iter, net, repl->name, repl->size, + &alloc_state); + if (ret != 0) + break; + ++i; + } + + if (ret != 0) { + xt_entry_foreach(iter, entry0, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter, net); + } + return ret; + } + + return ret; + out_free: + kvfree(offsets); + return ret; +} + +static void +get_counters(const struct xt_table_info *t, + struct xt_counters counters[]) +{ + struct ipt_entry *iter; + unsigned int cpu; + unsigned int i; + + for_each_possible_cpu(cpu) { + seqcount_t *s = &per_cpu(xt_recseq, cpu); + + i = 0; + xt_entry_foreach(iter, t->entries, t->size) { + struct xt_counters *tmp; + u64 bcnt, pcnt; + unsigned int start; + + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); + do { + start = read_seqcount_begin(s); + bcnt = tmp->bcnt; + pcnt = tmp->pcnt; + } while (read_seqcount_retry(s, start)); + + ADD_COUNTER(counters[i], bcnt, pcnt); + ++i; /* macro does multi eval of i */ + cond_resched(); + } + } +} + +static void get_old_counters(const struct xt_table_info *t, + struct xt_counters counters[]) +{ + struct ipt_entry *iter; + unsigned int cpu, i; + + for_each_possible_cpu(cpu) { + i = 0; + xt_entry_foreach(iter, t->entries, t->size) { + const struct xt_counters *tmp; + + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); + ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); + ++i; /* macro does multi eval of i */ + } + + cond_resched(); + } +} + +static struct xt_counters *alloc_counters(const struct xt_table *table) +{ + unsigned int countersize; + struct xt_counters *counters; + const struct xt_table_info *private = table->private; + + /* We need atomic snapshot of counters: rest doesn't change + (other than comefrom, which userspace doesn't care + about). */ + countersize = sizeof(struct xt_counters) * private->number; + counters = vzalloc(countersize); + + if (counters == NULL) + return ERR_PTR(-ENOMEM); + + get_counters(private, counters); + + return counters; +} + +static int +copy_entries_to_user(unsigned int total_size, + const struct xt_table *table, + void __user *userptr) +{ + unsigned int off, num; + const struct ipt_entry *e; + struct xt_counters *counters; + const struct xt_table_info *private = table->private; + int ret = 0; + const void *loc_cpu_entry; + + counters = alloc_counters(table); + if (IS_ERR(counters)) + return PTR_ERR(counters); + + loc_cpu_entry = private->entries; + + /* FIXME: use iterator macros --RR */ + /* ... then go back and fix counters and names */ + for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ + unsigned int i; + const struct xt_entry_match *m; + const struct xt_entry_target *t; + + e = loc_cpu_entry + off; + if (copy_to_user(userptr + off, e, sizeof(*e))) { + ret = -EFAULT; + goto free_counters; + } + if (copy_to_user(userptr + off + + offsetof(struct ipt_entry, counters), + &counters[num], + sizeof(counters[num])) != 0) { + ret = -EFAULT; + goto free_counters; + } + + for (i = sizeof(struct ipt_entry); + i < e->target_offset; + i += m->u.match_size) { + m = (void *)e + i; + + if (xt_match_to_user(m, userptr + off + i)) { + ret = -EFAULT; + goto free_counters; + } + } + + t = ipt_get_target_c(e); + if (xt_target_to_user(t, userptr + off + e->target_offset)) { + ret = -EFAULT; + goto free_counters; + } + } + + free_counters: + vfree(counters); + return ret; +} + +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT +static void compat_standard_from_user(void *dst, const void *src) +{ + int v = *(compat_int_t *)src; + + if (v > 0) + v += xt_compat_calc_jump(AF_INET, v); + memcpy(dst, &v, sizeof(v)); +} + +static int compat_standard_to_user(void __user *dst, const void *src) +{ + compat_int_t cv = *(int *)src; + + if (cv > 0) + cv -= xt_compat_calc_jump(AF_INET, cv); + return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; +} + +static int compat_calc_entry(const struct ipt_entry *e, + const struct xt_table_info *info, + const void *base, struct xt_table_info *newinfo) +{ + const struct xt_entry_match *ematch; + const struct xt_entry_target *t; + unsigned int entry_offset; + int off, i, ret; + + off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); + entry_offset = (void *)e - base; + xt_ematch_foreach(ematch, e) + off += xt_compat_match_offset(ematch->u.kernel.match); + t = ipt_get_target_c(e); + off += xt_compat_target_offset(t->u.kernel.target); + newinfo->size -= off; + ret = xt_compat_add_offset(AF_INET, entry_offset, off); + if (ret) + return ret; + + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + if (info->hook_entry[i] && + (e < (struct ipt_entry *)(base + info->hook_entry[i]))) + newinfo->hook_entry[i] -= off; + if (info->underflow[i] && + (e < (struct ipt_entry *)(base + info->underflow[i]))) + newinfo->underflow[i] -= off; + } + return 0; +} + +static int compat_table_info(const struct xt_table_info *info, + struct xt_table_info *newinfo) +{ + struct ipt_entry *iter; + const void *loc_cpu_entry; + int ret; + + if (!newinfo || !info) + return -EINVAL; + + /* we dont care about newinfo->entries */ + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + newinfo->initial_entries = 0; + loc_cpu_entry = info->entries; + ret = xt_compat_init_offsets(AF_INET, info->number); + if (ret) + return ret; + xt_entry_foreach(iter, loc_cpu_entry, info->size) { + ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); + if (ret != 0) + return ret; + } + return 0; +} +#endif + +static int get_info(struct net *net, void __user *user, const int *len) +{ + char name[XT_TABLE_MAXNAMELEN]; + struct xt_table *t; + int ret; + + if (*len != sizeof(struct ipt_getinfo)) + return -EINVAL; + + if (copy_from_user(name, user, sizeof(name)) != 0) + return -EFAULT; + + name[XT_TABLE_MAXNAMELEN-1] = '\0'; +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT + if (in_compat_syscall()) + xt_compat_lock(AF_INET); +#endif + t = xt_request_find_table_lock(net, AF_INET, name); + if (!IS_ERR(t)) { + struct ipt_getinfo info; + const struct xt_table_info *private = t->private; +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT + struct xt_table_info tmp; + + if (in_compat_syscall()) { + ret = compat_table_info(private, &tmp); + xt_compat_flush_offsets(AF_INET); + private = &tmp; + } +#endif + memset(&info, 0, sizeof(info)); + info.valid_hooks = t->valid_hooks; + memcpy(info.hook_entry, private->hook_entry, + sizeof(info.hook_entry)); + memcpy(info.underflow, private->underflow, + sizeof(info.underflow)); + info.num_entries = private->number; + info.size = private->size; + strcpy(info.name, name); + + if (copy_to_user(user, &info, *len) != 0) + ret = -EFAULT; + else + ret = 0; + + xt_table_unlock(t); + module_put(t->me); + } else + ret = PTR_ERR(t); +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT + if (in_compat_syscall()) + xt_compat_unlock(AF_INET); +#endif + return ret; +} + +static int +get_entries(struct net *net, struct ipt_get_entries __user *uptr, + const int *len) +{ + int ret; + struct ipt_get_entries get; + struct xt_table *t; + + if (*len < sizeof(get)) + return -EINVAL; + if (copy_from_user(&get, uptr, sizeof(get)) != 0) + return -EFAULT; + if (*len != sizeof(struct ipt_get_entries) + get.size) + return -EINVAL; + get.name[sizeof(get.name) - 1] = '\0'; + + t = xt_find_table_lock(net, AF_INET, get.name); + if (!IS_ERR(t)) { + const struct xt_table_info *private = t->private; + if (get.size == private->size) + ret = copy_entries_to_user(private->size, + t, uptr->entrytable); + else + ret = -EAGAIN; + + module_put(t->me); + xt_table_unlock(t); + } else + ret = PTR_ERR(t); + + return ret; +} + +static int +__do_replace(struct net *net, const char *name, unsigned int valid_hooks, + struct xt_table_info *newinfo, unsigned int num_counters, + void __user *counters_ptr) +{ + int ret; + struct xt_table *t; + struct xt_table_info *oldinfo; + struct xt_counters *counters; + struct ipt_entry *iter; + + counters = xt_counters_alloc(num_counters); + if (!counters) { + ret = -ENOMEM; + goto out; + } + + t = xt_request_find_table_lock(net, AF_INET, name); + if (IS_ERR(t)) { + ret = PTR_ERR(t); + goto free_newinfo_counters_untrans; + } + + /* You lied! */ + if (valid_hooks != t->valid_hooks) { + ret = -EINVAL; + goto put_module; + } + + oldinfo = xt_replace_table(t, num_counters, newinfo, &ret); + if (!oldinfo) + goto put_module; + + /* Update module usage count based on number of rules */ + if ((oldinfo->number > oldinfo->initial_entries) || + (newinfo->number <= oldinfo->initial_entries)) + module_put(t->me); + if ((oldinfo->number > oldinfo->initial_entries) && + (newinfo->number <= oldinfo->initial_entries)) + module_put(t->me); + + xt_table_unlock(t); + + get_old_counters(oldinfo, counters); + + /* Decrease module usage counts and free resource */ + xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) + cleanup_entry(iter, net); + + xt_free_table_info(oldinfo); + if (copy_to_user(counters_ptr, counters, + sizeof(struct xt_counters) * num_counters) != 0) { + /* Silent error, can't fail, new table is already in place */ + net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n"); + } + vfree(counters); + return 0; + + put_module: + module_put(t->me); + xt_table_unlock(t); + free_newinfo_counters_untrans: + vfree(counters); + out: + return ret; +} + +static int +do_replace(struct net *net, sockptr_t arg, unsigned int len) +{ + int ret; + struct ipt_replace tmp; + struct xt_table_info *newinfo; + void *loc_cpu_entry; + struct ipt_entry *iter; + + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) + return -EFAULT; + + /* overflow check */ + if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) + return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + + tmp.name[sizeof(tmp.name)-1] = 0; + + newinfo = xt_alloc_table_info(tmp.size); + if (!newinfo) + return -ENOMEM; + + loc_cpu_entry = newinfo->entries; + if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), + tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } + + ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); + if (ret != 0) + goto free_newinfo; + + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, + tmp.num_counters, tmp.counters); + if (ret) + goto free_newinfo_untrans; + return 0; + + free_newinfo_untrans: + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter, net); + free_newinfo: + xt_free_table_info(newinfo); + return ret; +} + +static int +do_add_counters(struct net *net, sockptr_t arg, unsigned int len) +{ + unsigned int i; + struct xt_counters_info tmp; + struct xt_counters *paddc; + struct xt_table *t; + const struct xt_table_info *private; + int ret = 0; + struct ipt_entry *iter; + unsigned int addend; + + paddc = xt_copy_counters(arg, len, &tmp); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); + + t = xt_find_table_lock(net, AF_INET, tmp.name); + if (IS_ERR(t)) { + ret = PTR_ERR(t); + goto free; + } + + local_bh_disable(); + private = t->private; + if (private->number != tmp.num_counters) { + ret = -EINVAL; + goto unlock_up_free; + } + + i = 0; + addend = xt_write_recseq_begin(); + xt_entry_foreach(iter, private->entries, private->size) { + struct xt_counters *tmp; + + tmp = xt_get_this_cpu_counter(&iter->counters); + ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); + ++i; + } + xt_write_recseq_end(addend); + unlock_up_free: + local_bh_enable(); + xt_table_unlock(t); + module_put(t->me); + free: + vfree(paddc); + + return ret; +} + +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT +struct compat_ipt_replace { + char name[XT_TABLE_MAXNAMELEN]; + u32 valid_hooks; + u32 num_entries; + u32 size; + u32 hook_entry[NF_INET_NUMHOOKS]; + u32 underflow[NF_INET_NUMHOOKS]; + u32 num_counters; + compat_uptr_t counters; /* struct xt_counters * */ + struct compat_ipt_entry entries[]; +}; + +static int +compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, + unsigned int *size, struct xt_counters *counters, + unsigned int i) +{ + struct xt_entry_target *t; + struct compat_ipt_entry __user *ce; + u_int16_t target_offset, next_offset; + compat_uint_t origsize; + const struct xt_entry_match *ematch; + int ret = 0; + + origsize = *size; + ce = *dstptr; + if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 || + copy_to_user(&ce->counters, &counters[i], + sizeof(counters[i])) != 0) + return -EFAULT; + + *dstptr += sizeof(struct compat_ipt_entry); + *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); + + xt_ematch_foreach(ematch, e) { + ret = xt_compat_match_to_user(ematch, dstptr, size); + if (ret != 0) + return ret; + } + target_offset = e->target_offset - (origsize - *size); + t = ipt_get_target(e); + ret = xt_compat_target_to_user(t, dstptr, size); + if (ret) + return ret; + next_offset = e->next_offset - (origsize - *size); + if (put_user(target_offset, &ce->target_offset) != 0 || + put_user(next_offset, &ce->next_offset) != 0) + return -EFAULT; + return 0; +} + +static int +compat_find_calc_match(struct xt_entry_match *m, + const struct ipt_ip *ip, + int *size) +{ + struct xt_match *match; + + match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, + m->u.user.revision); + if (IS_ERR(match)) + return PTR_ERR(match); + + m->u.kernel.match = match; + *size += xt_compat_match_offset(match); + return 0; +} + +static void compat_release_entry(struct compat_ipt_entry *e) +{ + struct xt_entry_target *t; + struct xt_entry_match *ematch; + + /* Cleanup all matches */ + xt_ematch_foreach(ematch, e) + module_put(ematch->u.kernel.match->me); + t = compat_ipt_get_target(e); + module_put(t->u.kernel.target->me); +} + +static int +check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, + struct xt_table_info *newinfo, + unsigned int *size, + const unsigned char *base, + const unsigned char *limit) +{ + struct xt_entry_match *ematch; + struct xt_entry_target *t; + struct xt_target *target; + unsigned int entry_offset; + unsigned int j; + int ret, off; + + if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || + (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) + return -EINVAL; + + if (e->next_offset < sizeof(struct compat_ipt_entry) + + sizeof(struct compat_xt_entry_target)) + return -EINVAL; + + if (!ip_checkentry(&e->ip)) + return -EINVAL; + + ret = xt_compat_check_entry_offsets(e, e->elems, + e->target_offset, e->next_offset); + if (ret) + return ret; + + off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); + entry_offset = (void *)e - (void *)base; + j = 0; + xt_ematch_foreach(ematch, e) { + ret = compat_find_calc_match(ematch, &e->ip, &off); + if (ret != 0) + goto release_matches; + ++j; + } + + t = compat_ipt_get_target(e); + target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { + ret = PTR_ERR(target); + goto release_matches; + } + t->u.kernel.target = target; + + off += xt_compat_target_offset(target); + *size += off; + ret = xt_compat_add_offset(AF_INET, entry_offset, off); + if (ret) + goto out; + + return 0; + +out: + module_put(t->u.kernel.target->me); +release_matches: + xt_ematch_foreach(ematch, e) { + if (j-- == 0) + break; + module_put(ematch->u.kernel.match->me); + } + return ret; +} + +static void +compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, + unsigned int *size, + struct xt_table_info *newinfo, unsigned char *base) +{ + struct xt_entry_target *t; + struct ipt_entry *de; + unsigned int origsize; + int h; + struct xt_entry_match *ematch; + + origsize = *size; + de = *dstptr; + memcpy(de, e, sizeof(struct ipt_entry)); + memcpy(&de->counters, &e->counters, sizeof(e->counters)); + + *dstptr += sizeof(struct ipt_entry); + *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); + + xt_ematch_foreach(ematch, e) + xt_compat_match_from_user(ematch, dstptr, size); + + de->target_offset = e->target_offset - (origsize - *size); + t = compat_ipt_get_target(e); + xt_compat_target_from_user(t, dstptr, size); + + de->next_offset = e->next_offset - (origsize - *size); + + for (h = 0; h < NF_INET_NUMHOOKS; h++) { + if ((unsigned char *)de - base < newinfo->hook_entry[h]) + newinfo->hook_entry[h] -= origsize - *size; + if ((unsigned char *)de - base < newinfo->underflow[h]) + newinfo->underflow[h] -= origsize - *size; + } +} + +static int +translate_compat_table(struct net *net, + struct xt_table_info **pinfo, + void **pentry0, + const struct compat_ipt_replace *compatr) +{ + unsigned int i, j; + struct xt_table_info *newinfo, *info; + void *pos, *entry0, *entry1; + struct compat_ipt_entry *iter0; + struct ipt_replace repl; + unsigned int size; + int ret; + + info = *pinfo; + entry0 = *pentry0; + size = compatr->size; + info->number = compatr->num_entries; + + j = 0; + xt_compat_lock(AF_INET); + ret = xt_compat_init_offsets(AF_INET, compatr->num_entries); + if (ret) + goto out_unlock; + /* Walk through entries, checking offsets. */ + xt_entry_foreach(iter0, entry0, compatr->size) { + ret = check_compat_entry_size_and_hooks(iter0, info, &size, + entry0, + entry0 + compatr->size); + if (ret != 0) + goto out_unlock; + ++j; + } + + ret = -EINVAL; + if (j != compatr->num_entries) + goto out_unlock; + + ret = -ENOMEM; + newinfo = xt_alloc_table_info(size); + if (!newinfo) + goto out_unlock; + + memset(newinfo->entries, 0, size); + + newinfo->number = compatr->num_entries; + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + newinfo->hook_entry[i] = compatr->hook_entry[i]; + newinfo->underflow[i] = compatr->underflow[i]; + } + entry1 = newinfo->entries; + pos = entry1; + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + + /* all module references in entry0 are now gone. + * entry1/newinfo contains a 64bit ruleset that looks exactly as + * generated by 64bit userspace. + * + * Call standard translate_table() to validate all hook_entrys, + * underflows, check for loops, etc. + */ + xt_compat_flush_offsets(AF_INET); + xt_compat_unlock(AF_INET); + + memcpy(&repl, compatr, sizeof(*compatr)); + + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; + } + + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(net, newinfo, entry1, &repl); + if (ret) + goto free_newinfo; + + *pinfo = newinfo; + *pentry0 = entry1; + xt_free_table_info(info); + return 0; + +free_newinfo: + xt_free_table_info(newinfo); + return ret; +out_unlock: + xt_compat_flush_offsets(AF_INET); + xt_compat_unlock(AF_INET); + xt_entry_foreach(iter0, entry0, compatr->size) { + if (j-- == 0) + break; + compat_release_entry(iter0); + } + return ret; +} + +static int +compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) +{ + int ret; + struct compat_ipt_replace tmp; + struct xt_table_info *newinfo; + void *loc_cpu_entry; + struct ipt_entry *iter; + + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) + return -EFAULT; + + /* overflow check */ + if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) + return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + + tmp.name[sizeof(tmp.name)-1] = 0; + + newinfo = xt_alloc_table_info(tmp.size); + if (!newinfo) + return -ENOMEM; + + loc_cpu_entry = newinfo->entries; + if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), + tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } + + ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); + if (ret != 0) + goto free_newinfo; + + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, + tmp.num_counters, compat_ptr(tmp.counters)); + if (ret) + goto free_newinfo_untrans; + return 0; + + free_newinfo_untrans: + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter, net); + free_newinfo: + xt_free_table_info(newinfo); + return ret; +} + +struct compat_ipt_get_entries { + char name[XT_TABLE_MAXNAMELEN]; + compat_uint_t size; + struct compat_ipt_entry entrytable[]; +}; + +static int +compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, + void __user *userptr) +{ + struct xt_counters *counters; + const struct xt_table_info *private = table->private; + void __user *pos; + unsigned int size; + int ret = 0; + unsigned int i = 0; + struct ipt_entry *iter; + + counters = alloc_counters(table); + if (IS_ERR(counters)) + return PTR_ERR(counters); + + pos = userptr; + size = total_size; + xt_entry_foreach(iter, private->entries, total_size) { + ret = compat_copy_entry_to_user(iter, &pos, + &size, counters, i++); + if (ret != 0) + break; + } + + vfree(counters); + return ret; +} + +static int +compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, + int *len) +{ + int ret; + struct compat_ipt_get_entries get; + struct xt_table *t; + + if (*len < sizeof(get)) + return -EINVAL; + + if (copy_from_user(&get, uptr, sizeof(get)) != 0) + return -EFAULT; + + if (*len != sizeof(struct compat_ipt_get_entries) + get.size) + return -EINVAL; + + get.name[sizeof(get.name) - 1] = '\0'; + + xt_compat_lock(AF_INET); + t = xt_find_table_lock(net, AF_INET, get.name); + if (!IS_ERR(t)) { + const struct xt_table_info *private = t->private; + struct xt_table_info info; + ret = compat_table_info(private, &info); + if (!ret && get.size == info.size) + ret = compat_copy_entries_to_user(private->size, + t, uptr->entrytable); + else if (!ret) + ret = -EAGAIN; + + xt_compat_flush_offsets(AF_INET); + module_put(t->me); + xt_table_unlock(t); + } else + ret = PTR_ERR(t); + + xt_compat_unlock(AF_INET); + return ret; +} +#endif + +static int +do_ipt_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IPT_SO_SET_REPLACE: +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT + if (in_compat_syscall()) + ret = compat_do_replace(sock_net(sk), arg, len); + else +#endif + ret = do_replace(sock_net(sk), arg, len); + break; + + case IPT_SO_SET_ADD_COUNTERS: + ret = do_add_counters(sock_net(sk), arg, len); + break; + + default: + ret = -EINVAL; + } + + return ret; +} + +static int +do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IPT_SO_GET_INFO: + ret = get_info(sock_net(sk), user, len); + break; + + case IPT_SO_GET_ENTRIES: +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT + if (in_compat_syscall()) + ret = compat_get_entries(sock_net(sk), user, len); + else +#endif + ret = get_entries(sock_net(sk), user, len); + break; + + case IPT_SO_GET_REVISION_MATCH: + case IPT_SO_GET_REVISION_TARGET: { + struct xt_get_revision rev; + int target; + + if (*len != sizeof(rev)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&rev, user, sizeof(rev)) != 0) { + ret = -EFAULT; + break; + } + rev.name[sizeof(rev.name)-1] = 0; + + if (cmd == IPT_SO_GET_REVISION_TARGET) + target = 1; + else + target = 0; + + try_then_request_module(xt_find_revision(AF_INET, rev.name, + rev.revision, + target, &ret), + "ipt_%s", rev.name); + break; + } + + default: + ret = -EINVAL; + } + + return ret; +} + +static void __ipt_unregister_table(struct net *net, struct xt_table *table) +{ + struct xt_table_info *private; + void *loc_cpu_entry; + struct module *table_owner = table->me; + struct ipt_entry *iter; + + private = xt_unregister_table(table); + + /* Decrease module usage counts and free resources */ + loc_cpu_entry = private->entries; + xt_entry_foreach(iter, loc_cpu_entry, private->size) + cleanup_entry(iter, net); + if (private->number > private->initial_entries) + module_put(table_owner); + xt_free_table_info(private); +} + +int ipt_register_table(struct net *net, const struct xt_table *table, + const struct ipt_replace *repl, + const struct nf_hook_ops *template_ops) +{ + struct nf_hook_ops *ops; + unsigned int num_ops; + int ret, i; + struct xt_table_info *newinfo; + struct xt_table_info bootstrap = {0}; + void *loc_cpu_entry; + struct xt_table *new_table; + + newinfo = xt_alloc_table_info(repl->size); + if (!newinfo) + return -ENOMEM; + + loc_cpu_entry = newinfo->entries; + memcpy(loc_cpu_entry, repl->entries, repl->size); + + ret = translate_table(net, newinfo, loc_cpu_entry, repl); + if (ret != 0) { + xt_free_table_info(newinfo); + return ret; + } + + new_table = xt_register_table(net, table, &bootstrap, newinfo); + if (IS_ERR(new_table)) { + struct ipt_entry *iter; + + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter, net); + xt_free_table_info(newinfo); + return PTR_ERR(new_table); + } + + /* No template? No need to do anything. This is used by 'nat' table, it registers + * with the nat core instead of the netfilter core. + */ + if (!template_ops) + return 0; + + num_ops = hweight32(table->valid_hooks); + if (num_ops == 0) { + ret = -EINVAL; + goto out_free; + } + + ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL); + if (!ops) { + ret = -ENOMEM; + goto out_free; + } + + for (i = 0; i < num_ops; i++) + ops[i].priv = new_table; + + new_table->ops = ops; + + ret = nf_register_net_hooks(net, ops, num_ops); + if (ret != 0) + goto out_free; + + return ret; + +out_free: + __ipt_unregister_table(net, new_table); + return ret; +} + +void ipt_unregister_table_pre_exit(struct net *net, const char *name) +{ + struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name); + + if (table) + nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); +} + +void ipt_unregister_table_exit(struct net *net, const char *name) +{ + struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name); + + if (table) + __ipt_unregister_table(net, table); +} + +static struct xt_target ipt_builtin_tg[] __read_mostly = { + { + .name = XT_STANDARD_TARGET, + .targetsize = sizeof(int), + .family = NFPROTO_IPV4, +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT + .compatsize = sizeof(compat_int_t), + .compat_from_user = compat_standard_from_user, + .compat_to_user = compat_standard_to_user, +#endif + }, + { + .name = XT_ERROR_TARGET, + .target = ipt_error, + .targetsize = XT_FUNCTION_MAXNAMELEN, + .family = NFPROTO_IPV4, + }, +}; + +static struct nf_sockopt_ops ipt_sockopts = { + .pf = PF_INET, + .set_optmin = IPT_BASE_CTL, + .set_optmax = IPT_SO_SET_MAX+1, + .set = do_ipt_set_ctl, + .get_optmin = IPT_BASE_CTL, + .get_optmax = IPT_SO_GET_MAX+1, + .get = do_ipt_get_ctl, + .owner = THIS_MODULE, +}; + +static int __net_init ip_tables_net_init(struct net *net) +{ + return xt_proto_init(net, NFPROTO_IPV4); +} + +static void __net_exit ip_tables_net_exit(struct net *net) +{ + xt_proto_fini(net, NFPROTO_IPV4); +} + +static struct pernet_operations ip_tables_net_ops = { + .init = ip_tables_net_init, + .exit = ip_tables_net_exit, +}; + +static int __init ip_tables_init(void) +{ + int ret; + + ret = register_pernet_subsys(&ip_tables_net_ops); + if (ret < 0) + goto err1; + + /* No one else will be downing sem now, so we won't sleep */ + ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); + if (ret < 0) + goto err2; + + /* Register setsockopt */ + ret = nf_register_sockopt(&ipt_sockopts); + if (ret < 0) + goto err4; + + return 0; + +err4: + xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); +err2: + unregister_pernet_subsys(&ip_tables_net_ops); +err1: + return ret; +} + +static void __exit ip_tables_fini(void) +{ + nf_unregister_sockopt(&ipt_sockopts); + + xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); + unregister_pernet_subsys(&ip_tables_net_ops); +} + +EXPORT_SYMBOL(ipt_register_table); +EXPORT_SYMBOL(ipt_unregister_table_pre_exit); +EXPORT_SYMBOL(ipt_unregister_table_exit); +EXPORT_SYMBOL(ipt_do_table); +module_init(ip_tables_init); +module_exit(ip_tables_fini); diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c new file mode 100644 index 0000000000..5930d3b025 --- /dev/null +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* iptables module for the IPv4 and TCP ECN bits, Version 1.5 + * + * (C) 2002 by Harald Welte <laforge@netfilter.org> +*/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/in.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <net/ip.h> +#include <linux/tcp.h> +#include <net/checksum.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_ECN.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); +MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag modification"); + +/* set ECT codepoint from IP header. + * return false if there was an error. */ +static inline bool +set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo) +{ + struct iphdr *iph = ip_hdr(skb); + + if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { + __u8 oldtos; + if (skb_ensure_writable(skb, sizeof(struct iphdr))) + return false; + iph = ip_hdr(skb); + oldtos = iph->tos; + iph->tos &= ~IPT_ECN_IP_MASK; + iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); + csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); + } + return true; +} + +/* Return false if there was an error. */ +static inline bool +set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) +{ + struct tcphdr _tcph, *tcph; + __be16 oldval; + + /* Not enough header? */ + tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); + if (!tcph) + return false; + + if ((!(einfo->operation & IPT_ECN_OP_SET_ECE) || + tcph->ece == einfo->proto.tcp.ece) && + (!(einfo->operation & IPT_ECN_OP_SET_CWR) || + tcph->cwr == einfo->proto.tcp.cwr)) + return true; + + if (skb_ensure_writable(skb, ip_hdrlen(skb) + sizeof(*tcph))) + return false; + tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb); + + oldval = ((__be16 *)tcph)[6]; + if (einfo->operation & IPT_ECN_OP_SET_ECE) + tcph->ece = einfo->proto.tcp.ece; + if (einfo->operation & IPT_ECN_OP_SET_CWR) + tcph->cwr = einfo->proto.tcp.cwr; + + inet_proto_csum_replace2(&tcph->check, skb, + oldval, ((__be16 *)tcph)[6], false); + return true; +} + +static unsigned int +ecn_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ipt_ECN_info *einfo = par->targinfo; + + if (einfo->operation & IPT_ECN_OP_SET_IP) + if (!set_ect_ip(skb, einfo)) + return NF_DROP; + + if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) && + ip_hdr(skb)->protocol == IPPROTO_TCP) + if (!set_ect_tcp(skb, einfo)) + return NF_DROP; + + return XT_CONTINUE; +} + +static int ecn_tg_check(const struct xt_tgchk_param *par) +{ + const struct ipt_ECN_info *einfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; + + if (einfo->operation & IPT_ECN_OP_MASK) + return -EINVAL; + + if (einfo->ip_ect & ~IPT_ECN_IP_MASK) + return -EINVAL; + + if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) && + (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { + pr_info_ratelimited("cannot use operation on non-tcp rule\n"); + return -EINVAL; + } + return 0; +} + +static struct xt_target ecn_tg_reg __read_mostly = { + .name = "ECN", + .family = NFPROTO_IPV4, + .target = ecn_tg, + .targetsize = sizeof(struct ipt_ECN_info), + .table = "mangle", + .checkentry = ecn_tg_check, + .me = THIS_MODULE, +}; + +static int __init ecn_tg_init(void) +{ + return xt_register_target(&ecn_tg_reg); +} + +static void __exit ecn_tg_exit(void) +{ + xt_unregister_target(&ecn_tg_reg); +} + +module_init(ecn_tg_init); +module_exit(ecn_tg_exit); diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c new file mode 100644 index 0000000000..4b88407347 --- /dev/null +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This is a module which is used for rejecting packets. + */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <linux/icmp.h> +#include <net/icmp.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_REJECT.h> +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) +#include <linux/netfilter_bridge.h> +#endif + +#include <net/netfilter/ipv4/nf_reject.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); +MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4"); + +static unsigned int +reject_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ipt_reject_info *reject = par->targinfo; + int hook = xt_hooknum(par); + + switch (reject->with) { + case IPT_ICMP_NET_UNREACHABLE: + nf_send_unreach(skb, ICMP_NET_UNREACH, hook); + break; + case IPT_ICMP_HOST_UNREACHABLE: + nf_send_unreach(skb, ICMP_HOST_UNREACH, hook); + break; + case IPT_ICMP_PROT_UNREACHABLE: + nf_send_unreach(skb, ICMP_PROT_UNREACH, hook); + break; + case IPT_ICMP_PORT_UNREACHABLE: + nf_send_unreach(skb, ICMP_PORT_UNREACH, hook); + break; + case IPT_ICMP_NET_PROHIBITED: + nf_send_unreach(skb, ICMP_NET_ANO, hook); + break; + case IPT_ICMP_HOST_PROHIBITED: + nf_send_unreach(skb, ICMP_HOST_ANO, hook); + break; + case IPT_ICMP_ADMIN_PROHIBITED: + nf_send_unreach(skb, ICMP_PKT_FILTERED, hook); + break; + case IPT_TCP_RESET: + nf_send_reset(xt_net(par), par->state->sk, skb, hook); + break; + case IPT_ICMP_ECHOREPLY: + /* Doesn't happen. */ + break; + } + + return NF_DROP; +} + +static int reject_tg_check(const struct xt_tgchk_param *par) +{ + const struct ipt_reject_info *rejinfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; + + if (rejinfo->with == IPT_ICMP_ECHOREPLY) { + pr_info_ratelimited("ECHOREPLY no longer supported.\n"); + return -EINVAL; + } else if (rejinfo->with == IPT_TCP_RESET) { + /* Must specify that it's a TCP packet */ + if (e->ip.proto != IPPROTO_TCP || + (e->ip.invflags & XT_INV_PROTO)) { + pr_info_ratelimited("TCP_RESET invalid for non-tcp\n"); + return -EINVAL; + } + } + return 0; +} + +static struct xt_target reject_tg_reg __read_mostly = { + .name = "REJECT", + .family = NFPROTO_IPV4, + .target = reject_tg, + .targetsize = sizeof(struct ipt_reject_info), + .table = "filter", + .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT), + .checkentry = reject_tg_check, + .me = THIS_MODULE, +}; + +static int __init reject_tg_init(void) +{ + return xt_register_target(&reject_tg_reg); +} + +static void __exit reject_tg_exit(void) +{ + xt_unregister_target(&reject_tg_reg); +} + +module_init(reject_tg_init); +module_exit(reject_tg_exit); diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c new file mode 100644 index 0000000000..f2984c7eef --- /dev/null +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2013 Patrick McHardy <kaber@trash.net> + */ + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_SYNPROXY.h> + +#include <net/netfilter/nf_synproxy.h> + +static unsigned int +synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_synproxy_info *info = par->targinfo; + struct net *net = xt_net(par); + struct synproxy_net *snet = synproxy_pernet(net); + struct synproxy_options opts = {}; + struct tcphdr *th, _th; + + if (nf_ip_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP)) + return NF_DROP; + + th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th); + if (th == NULL) + return NF_DROP; + + if (!synproxy_parse_options(skb, par->thoff, th, &opts)) + return NF_DROP; + + if (th->syn && !(th->ack || th->fin || th->rst)) { + /* Initial SYN from client */ + this_cpu_inc(snet->stats->syn_received); + + if (th->ece && th->cwr) + opts.options |= XT_SYNPROXY_OPT_ECN; + + opts.options &= info->options; + opts.mss_encode = opts.mss_option; + opts.mss_option = info->mss; + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) + synproxy_init_timestamp_cookie(info, &opts); + else + opts.options &= ~(XT_SYNPROXY_OPT_WSCALE | + XT_SYNPROXY_OPT_SACK_PERM | + XT_SYNPROXY_OPT_ECN); + + synproxy_send_client_synack(net, skb, th, &opts); + consume_skb(skb); + return NF_STOLEN; + } else if (th->ack && !(th->fin || th->rst || th->syn)) { + /* ACK from client */ + if (synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq))) { + consume_skb(skb); + return NF_STOLEN; + } else { + return NF_DROP; + } + } + + return XT_CONTINUE; +} + +static int synproxy_tg4_check(const struct xt_tgchk_param *par) +{ + struct synproxy_net *snet = synproxy_pernet(par->net); + const struct ipt_entry *e = par->entryinfo; + int err; + + if (e->ip.proto != IPPROTO_TCP || + e->ip.invflags & XT_INV_PROTO) + return -EINVAL; + + err = nf_ct_netns_get(par->net, par->family); + if (err) + return err; + + err = nf_synproxy_ipv4_init(snet, par->net); + if (err) { + nf_ct_netns_put(par->net, par->family); + return err; + } + + return err; +} + +static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par) +{ + struct synproxy_net *snet = synproxy_pernet(par->net); + + nf_synproxy_ipv4_fini(snet, par->net); + nf_ct_netns_put(par->net, par->family); +} + +static struct xt_target synproxy_tg4_reg __read_mostly = { + .name = "SYNPROXY", + .family = NFPROTO_IPV4, + .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD), + .target = synproxy_tg4, + .targetsize = sizeof(struct xt_synproxy_info), + .checkentry = synproxy_tg4_check, + .destroy = synproxy_tg4_destroy, + .me = THIS_MODULE, +}; + +static int __init synproxy_tg4_init(void) +{ + return xt_register_target(&synproxy_tg4_reg); +} + +static void __exit synproxy_tg4_exit(void) +{ + xt_unregister_target(&synproxy_tg4_reg); +} + +module_init(synproxy_tg4_init); +module_exit(synproxy_tg4_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("Intercept TCP connections and establish them using syncookies"); diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c new file mode 100644 index 0000000000..161ba412cb --- /dev/null +++ b/net/ipv4/netfilter/ipt_ah.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Kernel module to match AH parameters. */ +/* (C) 1999-2000 Yon Uriarte <yon@astaro.de> + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/in.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> + +#include <linux/netfilter_ipv4/ipt_ah.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>"); +MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match"); + +/* Returns 1 if the spi is matched by the range, 0 otherwise */ +static inline bool +spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) +{ + bool r; + pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n", + invert ? '!' : ' ', min, spi, max); + r = (spi >= min && spi <= max) ^ invert; + pr_debug(" result %s\n", r ? "PASS" : "FAILED"); + return r; +} + +static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; + const struct ipt_ah *ahinfo = par->matchinfo; + + /* Must not be a fragment. */ + if (par->fragoff != 0) + return false; + + ah = skb_header_pointer(skb, par->thoff, sizeof(_ahdr), &_ahdr); + if (ah == NULL) { + /* We've been asked to examine this packet, and we + * can't. Hence, no choice but to drop. + */ + pr_debug("Dropping evil AH tinygram.\n"); + par->hotdrop = true; + return false; + } + + return spi_match(ahinfo->spis[0], ahinfo->spis[1], + ntohl(ah->spi), + !!(ahinfo->invflags & IPT_AH_INV_SPI)); +} + +static int ah_mt_check(const struct xt_mtchk_param *par) +{ + const struct ipt_ah *ahinfo = par->matchinfo; + + /* Must specify no unknown invflags */ + if (ahinfo->invflags & ~IPT_AH_INV_MASK) { + pr_debug("unknown flags %X\n", ahinfo->invflags); + return -EINVAL; + } + return 0; +} + +static struct xt_match ah_mt_reg __read_mostly = { + .name = "ah", + .family = NFPROTO_IPV4, + .match = ah_mt, + .matchsize = sizeof(struct ipt_ah), + .proto = IPPROTO_AH, + .checkentry = ah_mt_check, + .me = THIS_MODULE, +}; + +static int __init ah_mt_init(void) +{ + return xt_register_match(&ah_mt_reg); +} + +static void __exit ah_mt_exit(void) +{ + xt_unregister_match(&ah_mt_reg); +} + +module_init(ah_mt_init); +module_exit(ah_mt_exit); diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c new file mode 100644 index 0000000000..ded5bef02f --- /dev/null +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2011 Florian Westphal <fw@strlen.de> + * + * based on fib_frontend.c; Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/ip.h> +#include <net/ip.h> +#include <net/ip_fib.h> +#include <net/route.h> + +#include <linux/netfilter/xt_rpfilter.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_DESCRIPTION("iptables: ipv4 reverse path filter match"); + +/* don't try to find route from mcast/bcast/zeronet */ +static __be32 rpfilter_get_saddr(__be32 addr) +{ + if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) || + ipv4_is_zeronet(addr)) + return 0; + return addr; +} + +static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4, + const struct net_device *dev, u8 flags) +{ + struct fib_result res; + + if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) + return false; + + if (res.type != RTN_UNICAST) { + if (res.type != RTN_LOCAL || !(flags & XT_RPFILTER_ACCEPT_LOCAL)) + return false; + } + return fib_info_nh_uses_dev(res.fi, dev) || flags & XT_RPFILTER_LOOSE; +} + +static bool +rpfilter_is_loopback(const struct sk_buff *skb, const struct net_device *in) +{ + return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK; +} + +static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_rpfilter_info *info; + const struct iphdr *iph; + struct flowi4 flow; + bool invert; + + info = par->matchinfo; + invert = info->flags & XT_RPFILTER_INVERT; + + if (rpfilter_is_loopback(skb, xt_in(par))) + return true ^ invert; + + iph = ip_hdr(skb); + if (ipv4_is_zeronet(iph->saddr)) { + if (ipv4_is_lbcast(iph->daddr) || + ipv4_is_local_multicast(iph->daddr)) + return true ^ invert; + } + + memset(&flow, 0, sizeof(flow)); + flow.flowi4_iif = LOOPBACK_IFINDEX; + flow.daddr = iph->saddr; + flow.saddr = rpfilter_get_saddr(iph->daddr); + flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; + flow.flowi4_tos = iph->tos & IPTOS_RT_MASK; + flow.flowi4_scope = RT_SCOPE_UNIVERSE; + flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par)); + flow.flowi4_uid = sock_net_uid(xt_net(par), NULL); + + return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert; +} + +static int rpfilter_check(const struct xt_mtchk_param *par) +{ + const struct xt_rpfilter_info *info = par->matchinfo; + unsigned int options = ~XT_RPFILTER_OPTION_MASK; + if (info->flags & options) { + pr_info_ratelimited("unknown options\n"); + return -EINVAL; + } + + if (strcmp(par->table, "mangle") != 0 && + strcmp(par->table, "raw") != 0) { + pr_info_ratelimited("only valid in \'raw\' or \'mangle\' table, not \'%s\'\n", + par->table); + return -EINVAL; + } + + return 0; +} + +static struct xt_match rpfilter_mt_reg __read_mostly = { + .name = "rpfilter", + .family = NFPROTO_IPV4, + .checkentry = rpfilter_check, + .match = rpfilter_mt, + .matchsize = sizeof(struct xt_rpfilter_info), + .hooks = (1 << NF_INET_PRE_ROUTING), + .me = THIS_MODULE +}; + +static int __init rpfilter_mt_init(void) +{ + return xt_register_match(&rpfilter_mt_reg); +} + +static void __exit rpfilter_mt_exit(void) +{ + xt_unregister_match(&rpfilter_mt_reg); +} + +module_init(rpfilter_mt_init); +module_exit(rpfilter_mt_exit); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c new file mode 100644 index 0000000000..b9062f4552 --- /dev/null +++ b/net/ipv4/netfilter/iptable_filter.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x. + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org> + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/slab.h> +#include <net/ip.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); +MODULE_DESCRIPTION("iptables filter table"); + +#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT)) + +static const struct xt_table packet_filter = { + .name = "filter", + .valid_hooks = FILTER_VALID_HOOKS, + .me = THIS_MODULE, + .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_FILTER, +}; + +static struct nf_hook_ops *filter_ops __read_mostly; + +/* Default to forward because I got too much mail already. */ +static bool forward __read_mostly = true; +module_param(forward, bool, 0000); + +static int iptable_filter_table_init(struct net *net) +{ + struct ipt_replace *repl; + int err; + + repl = ipt_alloc_initial_table(&packet_filter); + if (repl == NULL) + return -ENOMEM; + /* Entry 1 is the FORWARD hook */ + ((struct ipt_standard *)repl->entries)[1].target.verdict = + forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; + + err = ipt_register_table(net, &packet_filter, repl, filter_ops); + kfree(repl); + return err; +} + +static int __net_init iptable_filter_net_init(struct net *net) +{ + if (!forward) + return iptable_filter_table_init(net); + + return 0; +} + +static void __net_exit iptable_filter_net_pre_exit(struct net *net) +{ + ipt_unregister_table_pre_exit(net, "filter"); +} + +static void __net_exit iptable_filter_net_exit(struct net *net) +{ + ipt_unregister_table_exit(net, "filter"); +} + +static struct pernet_operations iptable_filter_net_ops = { + .init = iptable_filter_net_init, + .pre_exit = iptable_filter_net_pre_exit, + .exit = iptable_filter_net_exit, +}; + +static int __init iptable_filter_init(void) +{ + int ret = xt_register_template(&packet_filter, + iptable_filter_table_init); + + if (ret < 0) + return ret; + + filter_ops = xt_hook_ops_alloc(&packet_filter, ipt_do_table); + if (IS_ERR(filter_ops)) { + xt_unregister_template(&packet_filter); + return PTR_ERR(filter_ops); + } + + ret = register_pernet_subsys(&iptable_filter_net_ops); + if (ret < 0) { + xt_unregister_template(&packet_filter); + kfree(filter_ops); + return ret; + } + + return 0; +} + +static void __exit iptable_filter_fini(void) +{ + unregister_pernet_subsys(&iptable_filter_net_ops); + xt_unregister_template(&packet_filter); + kfree(filter_ops); +} + +module_init(iptable_filter_init); +module_exit(iptable_filter_fini); diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c new file mode 100644 index 0000000000..3abb430af9 --- /dev/null +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x. + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org> + */ +#include <linux/module.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <net/sock.h> +#include <net/route.h> +#include <linux/ip.h> +#include <net/ip.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); +MODULE_DESCRIPTION("iptables mangle table"); + +#define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ + (1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT) | \ + (1 << NF_INET_POST_ROUTING)) + +static const struct xt_table packet_mangler = { + .name = "mangle", + .valid_hooks = MANGLE_VALID_HOOKS, + .me = THIS_MODULE, + .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_MANGLE, +}; + +static unsigned int +ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) +{ + unsigned int ret; + const struct iphdr *iph; + u_int8_t tos; + __be32 saddr, daddr; + u_int32_t mark; + int err; + + /* Save things which could affect route */ + mark = skb->mark; + iph = ip_hdr(skb); + saddr = iph->saddr; + daddr = iph->daddr; + tos = iph->tos; + + ret = ipt_do_table(priv, skb, state); + /* Reroute for ANY change. */ + if (ret != NF_DROP && ret != NF_STOLEN) { + iph = ip_hdr(skb); + + if (iph->saddr != saddr || + iph->daddr != daddr || + skb->mark != mark || + iph->tos != tos) { + err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); + if (err < 0) + ret = NF_DROP_ERR(err); + } + } + + return ret; +} + +/* The work comes in here from netfilter.c. */ +static unsigned int +iptable_mangle_hook(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + if (state->hook == NF_INET_LOCAL_OUT) + return ipt_mangle_out(priv, skb, state); + return ipt_do_table(priv, skb, state); +} + +static struct nf_hook_ops *mangle_ops __read_mostly; +static int iptable_mangle_table_init(struct net *net) +{ + struct ipt_replace *repl; + int ret; + + repl = ipt_alloc_initial_table(&packet_mangler); + if (repl == NULL) + return -ENOMEM; + ret = ipt_register_table(net, &packet_mangler, repl, mangle_ops); + kfree(repl); + return ret; +} + +static void __net_exit iptable_mangle_net_pre_exit(struct net *net) +{ + ipt_unregister_table_pre_exit(net, "mangle"); +} + +static void __net_exit iptable_mangle_net_exit(struct net *net) +{ + ipt_unregister_table_exit(net, "mangle"); +} + +static struct pernet_operations iptable_mangle_net_ops = { + .pre_exit = iptable_mangle_net_pre_exit, + .exit = iptable_mangle_net_exit, +}; + +static int __init iptable_mangle_init(void) +{ + int ret = xt_register_template(&packet_mangler, + iptable_mangle_table_init); + if (ret < 0) + return ret; + + mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook); + if (IS_ERR(mangle_ops)) { + xt_unregister_template(&packet_mangler); + ret = PTR_ERR(mangle_ops); + return ret; + } + + ret = register_pernet_subsys(&iptable_mangle_net_ops); + if (ret < 0) { + xt_unregister_template(&packet_mangler); + kfree(mangle_ops); + return ret; + } + + return ret; +} + +static void __exit iptable_mangle_fini(void) +{ + unregister_pernet_subsys(&iptable_mangle_net_ops); + xt_unregister_template(&packet_mangler); + kfree(mangle_ops); +} + +module_init(iptable_mangle_init); +module_exit(iptable_mangle_fini); diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c new file mode 100644 index 0000000000..56f6ecc434 --- /dev/null +++ b/net/ipv4/netfilter/iptable_nat.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2011 Patrick McHardy <kaber@trash.net> + */ + +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/ip.h> +#include <net/ip.h> + +#include <net/netfilter/nf_nat.h> + +struct iptable_nat_pernet { + struct nf_hook_ops *nf_nat_ops; +}; + +static unsigned int iptable_nat_net_id __read_mostly; + +static const struct xt_table nf_nat_ipv4_table = { + .name = "nat", + .valid_hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + .af = NFPROTO_IPV4, +}; + +static const struct nf_hook_ops nf_nat_ipv4_ops[] = { + { + .hook = ipt_do_table, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_NAT_DST, + }, + { + .hook = ipt_do_table, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_NAT_SRC, + }, + { + .hook = ipt_do_table, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_NAT_DST, + }, + { + .hook = ipt_do_table, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_NAT_SRC, + }, +}; + +static int ipt_nat_register_lookups(struct net *net) +{ + struct iptable_nat_pernet *xt_nat_net; + struct nf_hook_ops *ops; + struct xt_table *table; + int i, ret; + + xt_nat_net = net_generic(net, iptable_nat_net_id); + table = xt_find_table(net, NFPROTO_IPV4, "nat"); + if (WARN_ON_ONCE(!table)) + return -ENOENT; + + ops = kmemdup(nf_nat_ipv4_ops, sizeof(nf_nat_ipv4_ops), GFP_KERNEL); + if (!ops) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(nf_nat_ipv4_ops); i++) { + ops[i].priv = table; + ret = nf_nat_ipv4_register_fn(net, &ops[i]); + if (ret) { + while (i) + nf_nat_ipv4_unregister_fn(net, &ops[--i]); + + kfree(ops); + return ret; + } + } + + xt_nat_net->nf_nat_ops = ops; + return 0; +} + +static void ipt_nat_unregister_lookups(struct net *net) +{ + struct iptable_nat_pernet *xt_nat_net = net_generic(net, iptable_nat_net_id); + struct nf_hook_ops *ops = xt_nat_net->nf_nat_ops; + int i; + + if (!ops) + return; + + for (i = 0; i < ARRAY_SIZE(nf_nat_ipv4_ops); i++) + nf_nat_ipv4_unregister_fn(net, &ops[i]); + + kfree(ops); +} + +static int iptable_nat_table_init(struct net *net) +{ + struct ipt_replace *repl; + int ret; + + repl = ipt_alloc_initial_table(&nf_nat_ipv4_table); + if (repl == NULL) + return -ENOMEM; + + ret = ipt_register_table(net, &nf_nat_ipv4_table, repl, NULL); + if (ret < 0) { + kfree(repl); + return ret; + } + + ret = ipt_nat_register_lookups(net); + if (ret < 0) + ipt_unregister_table_exit(net, "nat"); + + kfree(repl); + return ret; +} + +static void __net_exit iptable_nat_net_pre_exit(struct net *net) +{ + ipt_nat_unregister_lookups(net); +} + +static void __net_exit iptable_nat_net_exit(struct net *net) +{ + ipt_unregister_table_exit(net, "nat"); +} + +static struct pernet_operations iptable_nat_net_ops = { + .pre_exit = iptable_nat_net_pre_exit, + .exit = iptable_nat_net_exit, + .id = &iptable_nat_net_id, + .size = sizeof(struct iptable_nat_pernet), +}; + +static int __init iptable_nat_init(void) +{ + int ret = xt_register_template(&nf_nat_ipv4_table, + iptable_nat_table_init); + + if (ret < 0) + return ret; + + ret = register_pernet_subsys(&iptable_nat_net_ops); + if (ret < 0) { + xt_unregister_template(&nf_nat_ipv4_table); + return ret; + } + + return ret; +} + +static void __exit iptable_nat_exit(void) +{ + unregister_pernet_subsys(&iptable_nat_net_ops); + xt_unregister_template(&nf_nat_ipv4_table); +} + +module_init(iptable_nat_init); +module_exit(iptable_nat_exit); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c new file mode 100644 index 0000000000..ca5e5b2158 --- /dev/null +++ b/net/ipv4/netfilter/iptable_raw.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * 'raw' table, which is the very first hooked in at PRE_ROUTING and LOCAL_OUT . + * + * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@netfilter.org> + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/slab.h> +#include <net/ip.h> + +#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) + +static bool raw_before_defrag __read_mostly; +MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag"); +module_param(raw_before_defrag, bool, 0000); + +static const struct xt_table packet_raw = { + .name = "raw", + .valid_hooks = RAW_VALID_HOOKS, + .me = THIS_MODULE, + .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_RAW, +}; + +static const struct xt_table packet_raw_before_defrag = { + .name = "raw", + .valid_hooks = RAW_VALID_HOOKS, + .me = THIS_MODULE, + .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_RAW_BEFORE_DEFRAG, +}; + +static struct nf_hook_ops *rawtable_ops __read_mostly; + +static int iptable_raw_table_init(struct net *net) +{ + struct ipt_replace *repl; + const struct xt_table *table = &packet_raw; + int ret; + + if (raw_before_defrag) + table = &packet_raw_before_defrag; + + repl = ipt_alloc_initial_table(table); + if (repl == NULL) + return -ENOMEM; + ret = ipt_register_table(net, table, repl, rawtable_ops); + kfree(repl); + return ret; +} + +static void __net_exit iptable_raw_net_pre_exit(struct net *net) +{ + ipt_unregister_table_pre_exit(net, "raw"); +} + +static void __net_exit iptable_raw_net_exit(struct net *net) +{ + ipt_unregister_table_exit(net, "raw"); +} + +static struct pernet_operations iptable_raw_net_ops = { + .pre_exit = iptable_raw_net_pre_exit, + .exit = iptable_raw_net_exit, +}; + +static int __init iptable_raw_init(void) +{ + int ret; + const struct xt_table *table = &packet_raw; + + if (raw_before_defrag) { + table = &packet_raw_before_defrag; + + pr_info("Enabling raw table before defrag\n"); + } + + ret = xt_register_template(table, + iptable_raw_table_init); + if (ret < 0) + return ret; + + rawtable_ops = xt_hook_ops_alloc(table, ipt_do_table); + if (IS_ERR(rawtable_ops)) { + xt_unregister_template(table); + return PTR_ERR(rawtable_ops); + } + + ret = register_pernet_subsys(&iptable_raw_net_ops); + if (ret < 0) { + xt_unregister_template(table); + kfree(rawtable_ops); + return ret; + } + + return ret; +} + +static void __exit iptable_raw_fini(void) +{ + unregister_pernet_subsys(&iptable_raw_net_ops); + kfree(rawtable_ops); + xt_unregister_template(&packet_raw); +} + +module_init(iptable_raw_init); +module_exit(iptable_raw_fini); +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c new file mode 100644 index 0000000000..d885443cb2 --- /dev/null +++ b/net/ipv4/netfilter/iptable_security.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * "security" table + * + * This is for use by Mandatory Access Control (MAC) security models, + * which need to be able to manage security policy in separate context + * to DAC. + * + * Based on iptable_mangle.c + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org> + * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com> + */ +#include <linux/module.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/slab.h> +#include <net/ip.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>"); +MODULE_DESCRIPTION("iptables security table, for MAC rules"); + +#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT) + +static const struct xt_table security_table = { + .name = "security", + .valid_hooks = SECURITY_VALID_HOOKS, + .me = THIS_MODULE, + .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_SECURITY, +}; + +static struct nf_hook_ops *sectbl_ops __read_mostly; + +static int iptable_security_table_init(struct net *net) +{ + struct ipt_replace *repl; + int ret; + + repl = ipt_alloc_initial_table(&security_table); + if (repl == NULL) + return -ENOMEM; + ret = ipt_register_table(net, &security_table, repl, sectbl_ops); + kfree(repl); + return ret; +} + +static void __net_exit iptable_security_net_pre_exit(struct net *net) +{ + ipt_unregister_table_pre_exit(net, "security"); +} + +static void __net_exit iptable_security_net_exit(struct net *net) +{ + ipt_unregister_table_exit(net, "security"); +} + +static struct pernet_operations iptable_security_net_ops = { + .pre_exit = iptable_security_net_pre_exit, + .exit = iptable_security_net_exit, +}; + +static int __init iptable_security_init(void) +{ + int ret = xt_register_template(&security_table, + iptable_security_table_init); + + if (ret < 0) + return ret; + + sectbl_ops = xt_hook_ops_alloc(&security_table, ipt_do_table); + if (IS_ERR(sectbl_ops)) { + xt_unregister_template(&security_table); + return PTR_ERR(sectbl_ops); + } + + ret = register_pernet_subsys(&iptable_security_net_ops); + if (ret < 0) { + xt_unregister_template(&security_table); + kfree(sectbl_ops); + return ret; + } + + return ret; +} + +static void __exit iptable_security_fini(void) +{ + unregister_pernet_subsys(&iptable_security_net_ops); + kfree(sectbl_ops); + xt_unregister_template(&security_table); +} + +module_init(iptable_security_init); +module_exit(iptable_security_fini); diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c new file mode 100644 index 0000000000..265b39bc43 --- /dev/null +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + */ + +#include <linux/types.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <linux/module.h> +#include <linux/rcupdate.h> +#include <linux/skbuff.h> +#include <net/netns/generic.h> +#include <net/route.h> +#include <net/ip.h> + +#include <linux/netfilter_bridge.h> +#include <linux/netfilter_ipv4.h> +#include <net/netfilter/ipv4/nf_defrag_ipv4.h> +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack.h> +#endif +#include <net/netfilter/nf_conntrack_zones.h> + +static DEFINE_MUTEX(defrag4_mutex); + +static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb, + u_int32_t user) +{ + int err; + + local_bh_disable(); + err = ip_defrag(net, skb, user); + local_bh_enable(); + + if (!err) + skb->ignore_df = 1; + + return err; +} + +static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, + struct sk_buff *skb) +{ + u16 zone_id = NF_CT_DEFAULT_ZONE_ID; +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + if (skb_nfct(skb)) { + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + + zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); + } +#endif + if (nf_bridge_in_prerouting(skb)) + return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id; + + if (hooknum == NF_INET_PRE_ROUTING) + return IP_DEFRAG_CONNTRACK_IN + zone_id; + else + return IP_DEFRAG_CONNTRACK_OUT + zone_id; +} + +static unsigned int ipv4_conntrack_defrag(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct sock *sk = skb->sk; + + if (sk && sk_fullsock(sk) && (sk->sk_family == PF_INET) && + inet_test_bit(NODEFRAG, sk)) + return NF_ACCEPT; + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#if !IS_ENABLED(CONFIG_NF_NAT) + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb))) + return NF_ACCEPT; +#endif + if (skb->_nfct == IP_CT_UNTRACKED) + return NF_ACCEPT; +#endif + /* Gather fragments. */ + if (ip_is_fragment(ip_hdr(skb))) { + enum ip_defrag_users user = + nf_ct_defrag_user(state->hook, skb); + + if (nf_ct_ipv4_gather_frags(state->net, skb, user)) + return NF_STOLEN; + } + return NF_ACCEPT; +} + +static const struct nf_hook_ops ipv4_defrag_ops[] = { + { + .hook = ipv4_conntrack_defrag, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, + { + .hook = ipv4_conntrack_defrag, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, +}; + +static void __net_exit defrag4_net_exit(struct net *net) +{ + if (net->nf.defrag_ipv4_users) { + nf_unregister_net_hooks(net, ipv4_defrag_ops, + ARRAY_SIZE(ipv4_defrag_ops)); + net->nf.defrag_ipv4_users = 0; + } +} + +static const struct nf_defrag_hook defrag_hook = { + .owner = THIS_MODULE, + .enable = nf_defrag_ipv4_enable, + .disable = nf_defrag_ipv4_disable, +}; + +static struct pernet_operations defrag4_net_ops = { + .exit = defrag4_net_exit, +}; + +static int __init nf_defrag_init(void) +{ + int err; + + err = register_pernet_subsys(&defrag4_net_ops); + if (err) + return err; + + rcu_assign_pointer(nf_defrag_v4_hook, &defrag_hook); + return err; +} + +static void __exit nf_defrag_fini(void) +{ + rcu_assign_pointer(nf_defrag_v4_hook, NULL); + unregister_pernet_subsys(&defrag4_net_ops); +} + +int nf_defrag_ipv4_enable(struct net *net) +{ + int err = 0; + + mutex_lock(&defrag4_mutex); + if (net->nf.defrag_ipv4_users == UINT_MAX) { + err = -EOVERFLOW; + goto out_unlock; + } + + if (net->nf.defrag_ipv4_users) { + net->nf.defrag_ipv4_users++; + goto out_unlock; + } + + err = nf_register_net_hooks(net, ipv4_defrag_ops, + ARRAY_SIZE(ipv4_defrag_ops)); + if (err == 0) + net->nf.defrag_ipv4_users = 1; + + out_unlock: + mutex_unlock(&defrag4_mutex); + return err; +} +EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable); + +void nf_defrag_ipv4_disable(struct net *net) +{ + mutex_lock(&defrag4_mutex); + if (net->nf.defrag_ipv4_users) { + net->nf.defrag_ipv4_users--; + if (net->nf.defrag_ipv4_users == 0) + nf_unregister_net_hooks(net, ipv4_defrag_ops, + ARRAY_SIZE(ipv4_defrag_ops)); + } + + mutex_unlock(&defrag4_mutex); +} +EXPORT_SYMBOL_GPL(nf_defrag_ipv4_disable); + +module_init(nf_defrag_init); +module_exit(nf_defrag_fini); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c new file mode 100644 index 0000000000..6cc5743c55 --- /dev/null +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * (C) 2007 by Sebastian Claßen <sebastian.classen@freenet.ag> + * (C) 2007-2010 by Jan Engelhardt <jengelh@medozas.de> + * + * Extracted from xt_TEE.c + */ +#include <linux/ip.h> +#include <linux/module.h> +#include <linux/percpu.h> +#include <linux/route.h> +#include <linux/skbuff.h> +#include <linux/netfilter.h> +#include <net/checksum.h> +#include <net/icmp.h> +#include <net/ip.h> +#include <net/route.h> +#include <net/netfilter/ipv4/nf_dup_ipv4.h> +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack.h> +#endif + +static bool nf_dup_ipv4_route(struct net *net, struct sk_buff *skb, + const struct in_addr *gw, int oif) +{ + const struct iphdr *iph = ip_hdr(skb); + struct rtable *rt; + struct flowi4 fl4; + + memset(&fl4, 0, sizeof(fl4)); + if (oif != -1) + fl4.flowi4_oif = oif; + + fl4.daddr = gw->s_addr; + fl4.flowi4_tos = RT_TOS(iph->tos); + fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH; + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) + return false; + + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + skb->dev = rt->dst.dev; + skb->protocol = htons(ETH_P_IP); + + return true; +} + +void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, + const struct in_addr *gw, int oif) +{ + struct iphdr *iph; + + if (this_cpu_read(nf_skb_duplicated)) + return; + /* + * Copy the skb, and route the copy. Will later return %XT_CONTINUE for + * the original skb, which should continue on its way as if nothing has + * happened. The copy should be independently delivered to the gateway. + */ + skb = pskb_copy(skb, GFP_ATOMIC); + if (skb == NULL) + return; + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + /* Avoid counting cloned packets towards the original connection. */ + nf_reset_ct(skb); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); +#endif + /* + * If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential + * loops between two hosts. + * + * Set %IP_DF so that the original source is notified of a potentially + * decreased MTU on the clone route. IPv6 does this too. + * + * IP header checksum will be recalculated at ip_local_out. + */ + iph = ip_hdr(skb); + iph->frag_off |= htons(IP_DF); + if (hooknum == NF_INET_PRE_ROUTING || + hooknum == NF_INET_LOCAL_IN) + --iph->ttl; + + if (nf_dup_ipv4_route(net, skb, gw, oif)) { + __this_cpu_write(nf_skb_duplicated, true); + ip_local_out(net, skb->sk, skb); + __this_cpu_write(nf_skb_duplicated, false); + } else { + kfree_skb(skb); + } +} +EXPORT_SYMBOL_GPL(nf_dup_ipv4); + +MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>"); +MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); +MODULE_DESCRIPTION("nf_dup_ipv4: Duplicate IPv4 packet"); +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c new file mode 100644 index 0000000000..faee20af48 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -0,0 +1,567 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * H.323 extension for NAT alteration. + * + * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net> + * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net> + * + * Based on the 'brute force' H.323 NAT module by + * Jozsef Kadlecsik <kadlec@netfilter.org> + */ + +#include <linux/module.h> +#include <linux/tcp.h> +#include <net/tcp.h> + +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <linux/netfilter/nf_conntrack_h323.h> + +/****************************************************************************/ +static int set_addr(struct sk_buff *skb, unsigned int protoff, + unsigned char **data, int dataoff, + unsigned int addroff, __be32 ip, __be16 port) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct { + __be32 ip; + __be16 port; + } __attribute__ ((__packed__)) buf; + const struct tcphdr *th; + struct tcphdr _tcph; + + buf.ip = ip; + buf.port = port; + addroff += dataoff; + + if (ip_hdr(skb)->protocol == IPPROTO_TCP) { + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + protoff, addroff, sizeof(buf), + (char *) &buf, sizeof(buf))) { + net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_tcp_packet error\n"); + return -1; + } + + /* Relocate data pointer */ + th = skb_header_pointer(skb, ip_hdrlen(skb), + sizeof(_tcph), &_tcph); + if (th == NULL) + return -1; + *data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff; + } else { + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, + protoff, addroff, sizeof(buf), + (char *) &buf, sizeof(buf))) { + net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n"); + return -1; + } + /* nf_nat_mangle_udp_packet uses skb_ensure_writable() to copy + * or pull everything in a linear buffer, so we can safely + * use the skb pointers now */ + *data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); + } + + return 0; +} + +/****************************************************************************/ +static int set_h225_addr(struct sk_buff *skb, unsigned int protoff, + unsigned char **data, int dataoff, + TransportAddress *taddr, + union nf_inet_addr *addr, __be16 port) +{ + return set_addr(skb, protoff, data, dataoff, taddr->ipAddress.ip, + addr->ip, port); +} + +/****************************************************************************/ +static int set_h245_addr(struct sk_buff *skb, unsigned protoff, + unsigned char **data, int dataoff, + H245_TransportAddress *taddr, + union nf_inet_addr *addr, __be16 port) +{ + return set_addr(skb, protoff, data, dataoff, + taddr->unicastAddress.iPAddress.network, + addr->ip, port); +} + +/****************************************************************************/ +static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, + TransportAddress *taddr, int count) +{ + const struct nf_ct_h323_master *info = nfct_help_data(ct); + int dir = CTINFO2DIR(ctinfo); + int i; + __be16 port; + union nf_inet_addr addr; + + for (i = 0; i < count; i++) { + if (get_h225_addr(ct, *data, &taddr[i], &addr, &port)) { + if (addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && + port == info->sig_port[dir]) { + /* GW->GK */ + + /* Fix for Gnomemeeting */ + if (i > 0 && + get_h225_addr(ct, *data, &taddr[0], + &addr, &port) && + (ntohl(addr.ip) & 0xff000000) == 0x7f000000) + i = 0; + + pr_debug("nf_nat_ras: set signal address %pI4:%hu->%pI4:%hu\n", + &addr.ip, port, + &ct->tuplehash[!dir].tuple.dst.u3.ip, + info->sig_port[!dir]); + return set_h225_addr(skb, protoff, data, 0, + &taddr[i], + &ct->tuplehash[!dir]. + tuple.dst.u3, + info->sig_port[!dir]); + } else if (addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && + port == info->sig_port[dir]) { + /* GK->GW */ + pr_debug("nf_nat_ras: set signal address %pI4:%hu->%pI4:%hu\n", + &addr.ip, port, + &ct->tuplehash[!dir].tuple.src.u3.ip, + info->sig_port[!dir]); + return set_h225_addr(skb, protoff, data, 0, + &taddr[i], + &ct->tuplehash[!dir]. + tuple.src.u3, + info->sig_port[!dir]); + } + } + } + + return 0; +} + +/****************************************************************************/ +static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, + TransportAddress *taddr, int count) +{ + int dir = CTINFO2DIR(ctinfo); + int i; + __be16 port; + union nf_inet_addr addr; + + for (i = 0; i < count; i++) { + if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) && + addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && + port == ct->tuplehash[dir].tuple.src.u.udp.port) { + pr_debug("nf_nat_ras: set rasAddress %pI4:%hu->%pI4:%hu\n", + &addr.ip, ntohs(port), + &ct->tuplehash[!dir].tuple.dst.u3.ip, + ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port)); + return set_h225_addr(skb, protoff, data, 0, &taddr[i], + &ct->tuplehash[!dir].tuple.dst.u3, + ct->tuplehash[!dir].tuple. + dst.u.udp.port); + } + } + + return 0; +} + +/****************************************************************************/ +static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, + H245_TransportAddress *taddr, + __be16 port, __be16 rtp_port, + struct nf_conntrack_expect *rtp_exp, + struct nf_conntrack_expect *rtcp_exp) +{ + struct nf_ct_h323_master *info = nfct_help_data(ct); + int dir = CTINFO2DIR(ctinfo); + int i; + u_int16_t nated_port; + + /* Set expectations for NAT */ + rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port; + rtp_exp->expectfn = nf_nat_follow_master; + rtp_exp->dir = !dir; + rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port; + rtcp_exp->expectfn = nf_nat_follow_master; + rtcp_exp->dir = !dir; + + /* Lookup existing expects */ + for (i = 0; i < H323_RTP_CHANNEL_MAX; i++) { + if (info->rtp_port[i][dir] == rtp_port) { + /* Expected */ + + /* Use allocated ports first. This will refresh + * the expects */ + rtp_exp->tuple.dst.u.udp.port = info->rtp_port[i][dir]; + rtcp_exp->tuple.dst.u.udp.port = + htons(ntohs(info->rtp_port[i][dir]) + 1); + break; + } else if (info->rtp_port[i][dir] == 0) { + /* Not expected */ + break; + } + } + + /* Run out of expectations */ + if (i >= H323_RTP_CHANNEL_MAX) { + net_notice_ratelimited("nf_nat_h323: out of expectations\n"); + return 0; + } + + /* Try to get a pair of ports. */ + for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port); + nated_port != 0; nated_port += 2) { + int ret; + + rtp_exp->tuple.dst.u.udp.port = htons(nated_port); + ret = nf_ct_expect_related(rtp_exp, 0); + if (ret == 0) { + rtcp_exp->tuple.dst.u.udp.port = + htons(nated_port + 1); + ret = nf_ct_expect_related(rtcp_exp, 0); + if (ret == 0) + break; + else if (ret == -EBUSY) { + nf_ct_unexpect_related(rtp_exp); + continue; + } else if (ret < 0) { + nf_ct_unexpect_related(rtp_exp); + nated_port = 0; + break; + } + } else if (ret != -EBUSY) { + nated_port = 0; + break; + } + } + + if (nated_port == 0) { /* No port available */ + net_notice_ratelimited("nf_nat_h323: out of RTP ports\n"); + return 0; + } + + /* Modify signal */ + if (set_h245_addr(skb, protoff, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons((port & htons(1)) ? nated_port + 1 : + nated_port))) { + nf_ct_unexpect_related(rtp_exp); + nf_ct_unexpect_related(rtcp_exp); + return -1; + } + + /* Save ports */ + info->rtp_port[i][dir] = rtp_port; + info->rtp_port[i][!dir] = htons(nated_port); + + /* Success */ + pr_debug("nf_nat_h323: expect RTP %pI4:%hu->%pI4:%hu\n", + &rtp_exp->tuple.src.u3.ip, + ntohs(rtp_exp->tuple.src.u.udp.port), + &rtp_exp->tuple.dst.u3.ip, + ntohs(rtp_exp->tuple.dst.u.udp.port)); + pr_debug("nf_nat_h323: expect RTCP %pI4:%hu->%pI4:%hu\n", + &rtcp_exp->tuple.src.u3.ip, + ntohs(rtcp_exp->tuple.src.u.udp.port), + &rtcp_exp->tuple.dst.u3.ip, + ntohs(rtcp_exp->tuple.dst.u.udp.port)); + + return 0; +} + +/****************************************************************************/ +static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, + H245_TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp) +{ + int dir = CTINFO2DIR(ctinfo); + u_int16_t nated_port = ntohs(port); + + /* Set expectations for NAT */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->expectfn = nf_nat_follow_master; + exp->dir = !dir; + + nated_port = nf_nat_exp_find_port(exp, nated_port); + if (nated_port == 0) { /* No port available */ + net_notice_ratelimited("nf_nat_h323: out of TCP ports\n"); + return 0; + } + + /* Modify signal */ + if (set_h245_addr(skb, protoff, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port)) < 0) { + nf_ct_unexpect_related(exp); + return -1; + } + + pr_debug("nf_nat_h323: expect T.120 %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, + ntohs(exp->tuple.src.u.tcp.port), + &exp->tuple.dst.u3.ip, + ntohs(exp->tuple.dst.u.tcp.port)); + + return 0; +} + +/****************************************************************************/ +static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, + TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp) +{ + struct nf_ct_h323_master *info = nfct_help_data(ct); + int dir = CTINFO2DIR(ctinfo); + u_int16_t nated_port = ntohs(port); + + /* Set expectations for NAT */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->expectfn = nf_nat_follow_master; + exp->dir = !dir; + + /* Check existing expects */ + if (info->sig_port[dir] == port) + nated_port = ntohs(info->sig_port[!dir]); + + nated_port = nf_nat_exp_find_port(exp, nated_port); + if (nated_port == 0) { /* No port available */ + net_notice_ratelimited("nf_nat_q931: out of TCP ports\n"); + return 0; + } + + /* Modify signal */ + if (set_h225_addr(skb, protoff, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port))) { + nf_ct_unexpect_related(exp); + return -1; + } + + /* Save ports */ + info->sig_port[dir] = port; + info->sig_port[!dir] = htons(nated_port); + + pr_debug("nf_nat_q931: expect H.245 %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, + ntohs(exp->tuple.src.u.tcp.port), + &exp->tuple.dst.u3.ip, + ntohs(exp->tuple.dst.u.tcp.port)); + + return 0; +} + +/**************************************************************************** + * This conntrack expect function replaces nf_conntrack_q931_expect() + * which was set by nf_conntrack_h323.c. + ****************************************************************************/ +static void ip_nat_q931_expect(struct nf_conn *new, + struct nf_conntrack_expect *this) +{ + struct nf_nat_range2 range; + + if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ + nf_nat_follow_master(new, this); + return; + } + + /* This must be a fresh one. */ + BUG_ON(new->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = range.max_addr = + new->tuplehash[!this->dir].tuple.src.u3; + nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); + + /* For DST manip, map port here to where it's expected. */ + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); + range.min_proto = range.max_proto = this->saved_proto; + range.min_addr = range.max_addr = + new->master->tuplehash[!this->dir].tuple.src.u3; + nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); +} + +/****************************************************************************/ +static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, + TransportAddress *taddr, int idx, + __be16 port, struct nf_conntrack_expect *exp) +{ + struct nf_ct_h323_master *info = nfct_help_data(ct); + int dir = CTINFO2DIR(ctinfo); + u_int16_t nated_port = ntohs(port); + union nf_inet_addr addr; + + /* Set expectations for NAT */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->expectfn = ip_nat_q931_expect; + exp->dir = !dir; + + /* Check existing expects */ + if (info->sig_port[dir] == port) + nated_port = ntohs(info->sig_port[!dir]); + + nated_port = nf_nat_exp_find_port(exp, nated_port); + if (nated_port == 0) { /* No port available */ + net_notice_ratelimited("nf_nat_ras: out of TCP ports\n"); + return 0; + } + + /* Modify signal */ + if (set_h225_addr(skb, protoff, data, 0, &taddr[idx], + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port))) { + nf_ct_unexpect_related(exp); + return -1; + } + + /* Save ports */ + info->sig_port[dir] = port; + info->sig_port[!dir] = htons(nated_port); + + /* Fix for Gnomemeeting */ + if (idx > 0 && + get_h225_addr(ct, *data, &taddr[0], &addr, &port) && + (ntohl(addr.ip) & 0xff000000) == 0x7f000000) { + if (set_h225_addr(skb, protoff, data, 0, &taddr[0], + &ct->tuplehash[!dir].tuple.dst.u3, + info->sig_port[!dir])) { + nf_ct_unexpect_related(exp); + return -1; + } + } + + /* Success */ + pr_debug("nf_nat_ras: expect Q.931 %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, + ntohs(exp->tuple.src.u.tcp.port), + &exp->tuple.dst.u3.ip, + ntohs(exp->tuple.dst.u.tcp.port)); + + return 0; +} + +/****************************************************************************/ +static void ip_nat_callforwarding_expect(struct nf_conn *new, + struct nf_conntrack_expect *this) +{ + struct nf_nat_range2 range; + + /* This must be a fresh one. */ + BUG_ON(new->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = range.max_addr = + new->tuplehash[!this->dir].tuple.src.u3; + nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); + + /* For DST manip, map port here to where it's expected. */ + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); + range.min_proto = range.max_proto = this->saved_proto; + range.min_addr = range.max_addr = this->saved_addr; + nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); +} + +/****************************************************************************/ +static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned char **data, int dataoff, + TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp) +{ + int dir = CTINFO2DIR(ctinfo); + u_int16_t nated_port; + + /* Set expectations for NAT */ + exp->saved_addr = exp->tuple.dst.u3; + exp->tuple.dst.u3.ip = ct->tuplehash[!dir].tuple.dst.u3.ip; + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->expectfn = ip_nat_callforwarding_expect; + exp->dir = !dir; + + nated_port = nf_nat_exp_find_port(exp, ntohs(port)); + if (nated_port == 0) { /* No port available */ + net_notice_ratelimited("nf_nat_q931: out of TCP ports\n"); + return 0; + } + + /* Modify signal */ + if (set_h225_addr(skb, protoff, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port))) { + nf_ct_unexpect_related(exp); + return -1; + } + + /* Success */ + pr_debug("nf_nat_q931: expect Call Forwarding %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, + ntohs(exp->tuple.src.u.tcp.port), + &exp->tuple.dst.u3.ip, + ntohs(exp->tuple.dst.u.tcp.port)); + + return 0; +} + +static struct nf_ct_helper_expectfn q931_nat = { + .name = "Q.931", + .expectfn = ip_nat_q931_expect, +}; + +static struct nf_ct_helper_expectfn callforwarding_nat = { + .name = "callforwarding", + .expectfn = ip_nat_callforwarding_expect, +}; + +static const struct nfct_h323_nat_hooks nathooks = { + .set_h245_addr = set_h245_addr, + .set_h225_addr = set_h225_addr, + .set_sig_addr = set_sig_addr, + .set_ras_addr = set_ras_addr, + .nat_rtp_rtcp = nat_rtp_rtcp, + .nat_t120 = nat_t120, + .nat_h245 = nat_h245, + .nat_callforwarding = nat_callforwarding, + .nat_q931 = nat_q931, +}; + +/****************************************************************************/ +static int __init nf_nat_h323_init(void) +{ + RCU_INIT_POINTER(nfct_h323_nat_hook, &nathooks); + nf_ct_helper_expectfn_register(&q931_nat); + nf_ct_helper_expectfn_register(&callforwarding_nat); + return 0; +} + +/****************************************************************************/ +static void __exit nf_nat_h323_fini(void) +{ + RCU_INIT_POINTER(nfct_h323_nat_hook, NULL); + nf_ct_helper_expectfn_unregister(&q931_nat); + nf_ct_helper_expectfn_unregister(&callforwarding_nat); + synchronize_rcu(); +} + +/****************************************************************************/ +module_init(nf_nat_h323_init); +module_exit(nf_nat_h323_fini); + +MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>"); +MODULE_DESCRIPTION("H.323 NAT helper"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NF_NAT_HELPER("h323"); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c new file mode 100644 index 0000000000..fab357cc85 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * nf_nat_pptp.c + * + * NAT support for PPTP (Point to Point Tunneling Protocol). + * PPTP is a protocol for creating virtual private networks. + * It is a specification defined by Microsoft and some vendors + * working with Microsoft. PPTP is built on top of a modified + * version of the Internet Generic Routing Encapsulation Protocol. + * GRE is defined in RFC 1701 and RFC 1702. Documentation of + * PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> + * + * TODO: - NAT to a unique tuple, not to TCP source port + * (needs netfilter tuple reservation) + */ + +#include <linux/module.h> +#include <linux/tcp.h> + +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_zones.h> +#include <linux/netfilter/nf_conntrack_proto_gre.h> +#include <linux/netfilter/nf_conntrack_pptp.h> + +#define NF_NAT_PPTP_VERSION "3.0" + +#define REQ_CID(req, off) (*(__be16 *)((char *)(req) + (off))) + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP"); +MODULE_ALIAS_NF_NAT_HELPER("pptp"); + +static void pptp_nat_expected(struct nf_conn *ct, + struct nf_conntrack_expect *exp) +{ + struct net *net = nf_ct_net(ct); + const struct nf_conn *master = ct->master; + struct nf_conntrack_expect *other_exp; + struct nf_conntrack_tuple t = {}; + const struct nf_ct_pptp_master *ct_pptp_info; + const struct nf_nat_pptp *nat_pptp_info; + struct nf_nat_range2 range; + struct nf_conn_nat *nat; + + nat = nf_ct_nat_ext_add(ct); + if (WARN_ON_ONCE(!nat)) + return; + + nat_pptp_info = &nat->help.nat_pptp_info; + ct_pptp_info = nfct_help_data(master); + + /* And here goes the grand finale of corrosion... */ + if (exp->dir == IP_CT_DIR_ORIGINAL) { + pr_debug("we are PNS->PAC\n"); + /* therefore, build tuple for PAC->PNS */ + t.src.l3num = AF_INET; + t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip; + t.src.u.gre.key = ct_pptp_info->pac_call_id; + t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip; + t.dst.u.gre.key = ct_pptp_info->pns_call_id; + t.dst.protonum = IPPROTO_GRE; + } else { + pr_debug("we are PAC->PNS\n"); + /* build tuple for PNS->PAC */ + t.src.l3num = AF_INET; + t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip; + t.src.u.gre.key = nat_pptp_info->pns_call_id; + t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip; + t.dst.u.gre.key = nat_pptp_info->pac_call_id; + t.dst.protonum = IPPROTO_GRE; + } + + pr_debug("trying to unexpect other dir: "); + nf_ct_dump_tuple_ip(&t); + other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t); + if (other_exp) { + nf_ct_unexpect_related(other_exp); + nf_ct_expect_put(other_exp); + pr_debug("success\n"); + } else { + pr_debug("not found!\n"); + } + + /* This must be a fresh one. */ + BUG_ON(ct->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.dst.u3; + if (exp->dir == IP_CT_DIR_ORIGINAL) { + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + range.min_proto = range.max_proto = exp->saved_proto; + } + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); + + /* For DST manip, map port here to where it's expected. */ + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.src.u3; + if (exp->dir == IP_CT_DIR_REPLY) { + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + range.min_proto = range.max_proto = exp->saved_proto; + } + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); +} + +/* outbound packets == from PNS to PAC */ +static int +pptp_outbound_pkt(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) + +{ + struct nf_ct_pptp_master *ct_pptp_info; + struct nf_conn_nat *nat = nfct_nat(ct); + struct nf_nat_pptp *nat_pptp_info; + u_int16_t msg; + __be16 new_callid; + unsigned int cid_off; + + if (WARN_ON_ONCE(!nat)) + return NF_DROP; + + nat_pptp_info = &nat->help.nat_pptp_info; + ct_pptp_info = nfct_help_data(ct); + + new_callid = ct_pptp_info->pns_call_id; + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); + /* FIXME: ideally we would want to reserve a call ID + * here. current netfilter NAT core is not able to do + * this :( For now we use TCP source port. This breaks + * multiple calls within one control session */ + + /* save original call ID in nat_info */ + nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; + + /* don't use tcph->source since we are at a DSTmanip + * hook (e.g. PREROUTING) and pkt is not mangled yet */ + new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + + /* save new call ID in ct info */ + ct_pptp_info->pns_call_id = new_callid; + break; + case PPTP_IN_CALL_REPLY: + cid_off = offsetof(union pptp_ctrl_union, icack.callID); + break; + case PPTP_CALL_CLEAR_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); + break; + default: + pr_debug("unknown outbound packet 0x%04x:%s\n", msg, + pptp_msg_name(msg)); + fallthrough; + case PPTP_SET_LINK_INFO: + /* only need to NAT in case PAC is behind NAT box */ + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass + * down to here */ + pr_debug("altering call id from 0x%04x to 0x%04x\n", + ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); + + /* mangle packet */ + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, + cid_off + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader), + sizeof(new_callid), (char *)&new_callid, + sizeof(new_callid))) + return NF_DROP; + return NF_ACCEPT; +} + +static void +pptp_exp_gre(struct nf_conntrack_expect *expect_orig, + struct nf_conntrack_expect *expect_reply) +{ + const struct nf_conn *ct = expect_orig->master; + struct nf_conn_nat *nat = nfct_nat(ct); + struct nf_ct_pptp_master *ct_pptp_info; + struct nf_nat_pptp *nat_pptp_info; + + if (WARN_ON_ONCE(!nat)) + return; + + nat_pptp_info = &nat->help.nat_pptp_info; + ct_pptp_info = nfct_help_data(ct); + + /* save original PAC call ID in nat_info */ + nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; + + /* alter expectation for PNS->PAC direction */ + expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id; + expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id; + expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id; + expect_orig->dir = IP_CT_DIR_ORIGINAL; + + /* alter expectation for PAC->PNS direction */ + expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id; + expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id; + expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id; + expect_reply->dir = IP_CT_DIR_REPLY; +} + +/* inbound packets == from PAC to PNS */ +static int +pptp_inbound_pkt(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) +{ + const struct nf_nat_pptp *nat_pptp_info; + struct nf_conn_nat *nat = nfct_nat(ct); + u_int16_t msg; + __be16 new_pcid; + unsigned int pcid_off; + + if (WARN_ON_ONCE(!nat)) + return NF_DROP; + + nat_pptp_info = &nat->help.nat_pptp_info; + new_pcid = nat_pptp_info->pns_call_id; + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REPLY: + pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID); + break; + case PPTP_IN_CALL_CONNECT: + pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID); + break; + case PPTP_IN_CALL_REQUEST: + /* only need to nat in case PAC is behind NAT box */ + return NF_ACCEPT; + case PPTP_WAN_ERROR_NOTIFY: + pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID); + break; + case PPTP_CALL_DISCONNECT_NOTIFY: + pcid_off = offsetof(union pptp_ctrl_union, disc.callID); + break; + case PPTP_SET_LINK_INFO: + pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID); + break; + default: + pr_debug("unknown inbound packet %s\n", pptp_msg_name(msg)); + fallthrough; + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST, + * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */ + + /* mangle packet */ + pr_debug("altering peer call id from 0x%04x to 0x%04x\n", + ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); + + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, + pcid_off + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader), + sizeof(new_pcid), (char *)&new_pcid, + sizeof(new_pcid))) + return NF_DROP; + return NF_ACCEPT; +} + +static const struct nf_nat_pptp_hook pptp_hooks = { + .outbound = pptp_outbound_pkt, + .inbound = pptp_inbound_pkt, + .exp_gre = pptp_exp_gre, + .expectfn = pptp_nat_expected, +}; + +static int __init nf_nat_helper_pptp_init(void) +{ + WARN_ON(nf_nat_pptp_hook != NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook, &pptp_hooks); + + return 0; +} + +static void __exit nf_nat_helper_pptp_fini(void) +{ + RCU_INIT_POINTER(nf_nat_pptp_hook, NULL); + synchronize_rcu(); +} + +module_init(nf_nat_helper_pptp_init); +module_exit(nf_nat_helper_pptp_fini); diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.asn1 b/net/ipv4/netfilter/nf_nat_snmp_basic.asn1 new file mode 100644 index 0000000000..24b73268f3 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.asn1 @@ -0,0 +1,177 @@ +Message ::= + SEQUENCE { + version + INTEGER ({snmp_version}), + + community + OCTET STRING, + + pdu + PDUs + } + + +ObjectName ::= + OBJECT IDENTIFIER + +ObjectSyntax ::= + CHOICE { + simple + SimpleSyntax, + + application-wide + ApplicationSyntax + } + +SimpleSyntax ::= + CHOICE { + integer-value + INTEGER, + + string-value + OCTET STRING, + + objectID-value + OBJECT IDENTIFIER + } + +ApplicationSyntax ::= + CHOICE { + ipAddress-value + IpAddress, + + counter-value + Counter32, + + timeticks-value + TimeTicks, + + arbitrary-value + Opaque, + + big-counter-value + Counter64, + + unsigned-integer-value + Unsigned32 + } + +IpAddress ::= + [APPLICATION 0] + IMPLICIT OCTET STRING OPTIONAL ({snmp_helper}) + +Counter32 ::= + [APPLICATION 1] + IMPLICIT INTEGER OPTIONAL + +Unsigned32 ::= + [APPLICATION 2] + IMPLICIT INTEGER OPTIONAL + +Gauge32 ::= Unsigned32 OPTIONAL + +TimeTicks ::= + [APPLICATION 3] + IMPLICIT INTEGER OPTIONAL + +Opaque ::= + [APPLICATION 4] + IMPLICIT OCTET STRING OPTIONAL + +Counter64 ::= + [APPLICATION 6] + IMPLICIT INTEGER OPTIONAL + +PDUs ::= + CHOICE { + get-request + GetRequest-PDU, + + get-next-request + GetNextRequest-PDU, + + get-bulk-request + GetBulkRequest-PDU, + + response + Response-PDU, + + set-request + SetRequest-PDU, + + inform-request + InformRequest-PDU, + + snmpV2-trap + SNMPv2-Trap-PDU, + + report + Report-PDU + } + +GetRequest-PDU ::= + [0] IMPLICIT PDU OPTIONAL + +GetNextRequest-PDU ::= + [1] IMPLICIT PDU OPTIONAL + +Response-PDU ::= + [2] IMPLICIT PDU OPTIONAL + +SetRequest-PDU ::= + [3] IMPLICIT PDU OPTIONAL + +-- [4] is obsolete + +GetBulkRequest-PDU ::= + [5] IMPLICIT PDU OPTIONAL + +InformRequest-PDU ::= + [6] IMPLICIT PDU OPTIONAL + +SNMPv2-Trap-PDU ::= + [7] IMPLICIT PDU OPTIONAL + +Report-PDU ::= + [8] IMPLICIT PDU OPTIONAL + +PDU ::= + SEQUENCE { + request-id + INTEGER, + + error-status + INTEGER, + + error-index + INTEGER, + + variable-bindings + VarBindList + } + + +VarBind ::= + SEQUENCE { + name + ObjectName, + + CHOICE { + value + ObjectSyntax, + + unSpecified + NULL, + + noSuchObject + [0] IMPLICIT NULL, + + noSuchInstance + [1] IMPLICIT NULL, + + endOfMibView + [2] IMPLICIT NULL + } +} + +VarBindList ::= SEQUENCE OF VarBind diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c new file mode 100644 index 0000000000..717b726504 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * nf_nat_snmp_basic.c + * + * Basic SNMP Application Layer Gateway + * + * This IP NAT module is intended for use with SNMP network + * discovery and monitoring applications where target networks use + * conflicting private address realms. + * + * Static NAT is used to remap the networks from the view of the network + * management system at the IP layer, and this module remaps some application + * layer addresses to match. + * + * The simplest form of ALG is performed, where only tagged IP addresses + * are modified. The module does not need to be MIB aware and only scans + * messages at the ASN.1/BER level. + * + * Currently, only SNMPv1 and SNMPv2 are supported. + * + * More information on ALG and associated issues can be found in + * RFC 2962 + * + * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory + * McLean & Jochen Friedrich, stripped down for use in the kernel. + * + * Copyright (c) 2000 RP Internet (www.rpi.net.au). + * + * Author: James Morris <jmorris@intercode.com.au> + * + * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net> + */ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <net/checksum.h> +#include <net/udp.h> + +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <linux/netfilter/nf_conntrack_snmp.h> +#include "nf_nat_snmp_basic.asn1.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); +MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway"); +MODULE_ALIAS("ip_nat_snmp_basic"); +MODULE_ALIAS_NFCT_HELPER("snmp_trap"); + +#define SNMP_PORT 161 +#define SNMP_TRAP_PORT 162 + +static DEFINE_SPINLOCK(snmp_lock); + +struct snmp_ctx { + unsigned char *begin; + __sum16 *check; + __be32 from; + __be32 to; +}; + +static void fast_csum(struct snmp_ctx *ctx, unsigned char offset) +{ + unsigned char s[12] = {0,}; + int size; + + if (offset & 1) { + memcpy(&s[1], &ctx->from, 4); + memcpy(&s[7], &ctx->to, 4); + s[0] = ~0; + s[1] = ~s[1]; + s[2] = ~s[2]; + s[3] = ~s[3]; + s[4] = ~s[4]; + s[5] = ~0; + size = 12; + } else { + memcpy(&s[0], &ctx->from, 4); + memcpy(&s[4], &ctx->to, 4); + s[0] = ~s[0]; + s[1] = ~s[1]; + s[2] = ~s[2]; + s[3] = ~s[3]; + size = 8; + } + *ctx->check = csum_fold(csum_partial(s, size, + ~csum_unfold(*ctx->check))); +} + +int snmp_version(void *context, size_t hdrlen, unsigned char tag, + const void *data, size_t datalen) +{ + if (datalen != 1) + return -EINVAL; + if (*(unsigned char *)data > 1) + return -ENOTSUPP; + return 1; +} + +int snmp_helper(void *context, size_t hdrlen, unsigned char tag, + const void *data, size_t datalen) +{ + struct snmp_ctx *ctx = (struct snmp_ctx *)context; + __be32 *pdata; + + if (datalen != 4) + return -EINVAL; + pdata = (__be32 *)data; + if (*pdata == ctx->from) { + pr_debug("%s: %pI4 to %pI4\n", __func__, + (void *)&ctx->from, (void *)&ctx->to); + + if (*ctx->check) + fast_csum(ctx, (unsigned char *)data - ctx->begin); + *pdata = ctx->to; + } + + return 1; +} + +static int snmp_translate(struct nf_conn *ct, int dir, struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); + u16 datalen = ntohs(udph->len) - sizeof(struct udphdr); + char *data = (unsigned char *)udph + sizeof(struct udphdr); + struct snmp_ctx ctx; + int ret; + + if (dir == IP_CT_DIR_ORIGINAL) { + ctx.from = ct->tuplehash[dir].tuple.src.u3.ip; + ctx.to = ct->tuplehash[!dir].tuple.dst.u3.ip; + } else { + ctx.from = ct->tuplehash[!dir].tuple.src.u3.ip; + ctx.to = ct->tuplehash[dir].tuple.dst.u3.ip; + } + + if (ctx.from == ctx.to) + return NF_ACCEPT; + + ctx.begin = (unsigned char *)udph + sizeof(struct udphdr); + ctx.check = &udph->check; + ret = asn1_ber_decoder(&nf_nat_snmp_basic_decoder, &ctx, data, datalen); + if (ret < 0) { + nf_ct_helper_log(skb, ct, "parser failed\n"); + return NF_DROP; + } + + return NF_ACCEPT; +} + +/* We don't actually set up expectations, just adjust internal IP + * addresses if this is being NATted + */ +static int help(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + int dir = CTINFO2DIR(ctinfo); + unsigned int ret; + const struct iphdr *iph = ip_hdr(skb); + const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); + + /* SNMP replies and originating SNMP traps get mangled */ + if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY) + return NF_ACCEPT; + if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL) + return NF_ACCEPT; + + /* No NAT? */ + if (!(ct->status & IPS_NAT_MASK)) + return NF_ACCEPT; + + /* Make sure the packet length is ok. So far, we were only guaranteed + * to have a valid length IP header plus 8 bytes, which means we have + * enough room for a UDP header. Just verify the UDP length field so we + * can mess around with the payload. + */ + if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { + nf_ct_helper_log(skb, ct, "dropping malformed packet\n"); + return NF_DROP; + } + + if (skb_ensure_writable(skb, skb->len)) { + nf_ct_helper_log(skb, ct, "cannot mangle packet"); + return NF_DROP; + } + + spin_lock_bh(&snmp_lock); + ret = snmp_translate(ct, dir, skb); + spin_unlock_bh(&snmp_lock); + return ret; +} + +static const struct nf_conntrack_expect_policy snmp_exp_policy = { + .max_expected = 0, + .timeout = 180, +}; + +static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { + .me = THIS_MODULE, + .help = help, + .expect_policy = &snmp_exp_policy, + .name = "snmp_trap", + .tuple.src.l3num = AF_INET, + .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT), + .tuple.dst.protonum = IPPROTO_UDP, +}; + +static int __init nf_nat_snmp_basic_init(void) +{ + BUG_ON(nf_nat_snmp_hook != NULL); + RCU_INIT_POINTER(nf_nat_snmp_hook, help); + + return nf_conntrack_helper_register(&snmp_trap_helper); +} + +static void __exit nf_nat_snmp_basic_fini(void) +{ + RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); + synchronize_rcu(); + nf_conntrack_helper_unregister(&snmp_trap_helper); +} + +module_init(nf_nat_snmp_basic_init); +module_exit(nf_nat_snmp_basic_fini); diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c new file mode 100644 index 0000000000..fc761915c5 --- /dev/null +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -0,0 +1,341 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + */ + +#include <linux/module.h> +#include <net/ip.h> +#include <net/tcp.h> +#include <net/route.h> +#include <net/dst.h> +#include <net/netfilter/ipv4/nf_reject.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_bridge.h> + +static int nf_reject_iphdr_validate(struct sk_buff *skb) +{ + struct iphdr *iph; + u32 len; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return 0; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return 0; + + len = ntohs(iph->tot_len); + if (skb->len < len) + return 0; + else if (len < (iph->ihl*4)) + return 0; + + if (!pskb_may_pull(skb, iph->ihl*4)) + return 0; + + return 1; +} + +struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook) +{ + const struct tcphdr *oth; + struct sk_buff *nskb; + struct iphdr *niph; + struct tcphdr _oth; + + if (!nf_reject_iphdr_validate(oldskb)) + return NULL; + + oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook); + if (!oth) + return NULL; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return NULL; + + nskb->dev = (struct net_device *)dev; + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, + READ_ONCE(net->ipv4.sysctl_ip_default_ttl)); + nf_reject_ip_tcphdr_put(nskb, oldskb, oth); + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + + return nskb; +} +EXPORT_SYMBOL_GPL(nf_reject_skb_v4_tcp_reset); + +struct sk_buff *nf_reject_skb_v4_unreach(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code) +{ + struct sk_buff *nskb; + struct iphdr *niph; + struct icmphdr *icmph; + unsigned int len; + int dataoff; + __wsum csum; + u8 proto; + + if (!nf_reject_iphdr_validate(oldskb)) + return NULL; + + /* IP header checks: fragment. */ + if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) + return NULL; + + /* RFC says return as much as we can without exceeding 576 bytes. */ + len = min_t(unsigned int, 536, oldskb->len); + + if (!pskb_may_pull(oldskb, len)) + return NULL; + + if (pskb_trim_rcsum(oldskb, ntohs(ip_hdr(oldskb)->tot_len))) + return NULL; + + dataoff = ip_hdrlen(oldskb); + proto = ip_hdr(oldskb)->protocol; + + if (!skb_csum_unnecessary(oldskb) && + nf_reject_verify_csum(oldskb, dataoff, proto) && + nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto)) + return NULL; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmphdr) + + LL_MAX_HEADER + len, GFP_ATOMIC); + if (!nskb) + return NULL; + + nskb->dev = (struct net_device *)dev; + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP, + READ_ONCE(net->ipv4.sysctl_ip_default_ttl)); + + skb_reset_transport_header(nskb); + icmph = skb_put_zero(nskb, sizeof(struct icmphdr)); + icmph->type = ICMP_DEST_UNREACH; + icmph->code = code; + + skb_put_data(nskb, skb_network_header(oldskb), len); + + csum = csum_partial((void *)icmph, len + sizeof(struct icmphdr), 0); + icmph->checksum = csum_fold(csum); + + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + + return nskb; +} +EXPORT_SYMBOL_GPL(nf_reject_skb_v4_unreach); + +const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *_oth, int hook) +{ + const struct tcphdr *oth; + + /* IP header checks: fragment. */ + if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) + return NULL; + + if (ip_hdr(oldskb)->protocol != IPPROTO_TCP) + return NULL; + + oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), + sizeof(struct tcphdr), _oth); + if (oth == NULL) + return NULL; + + /* No RST for RST. */ + if (oth->rst) + return NULL; + + /* Check checksum */ + if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) + return NULL; + + return oth; +} +EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_get); + +struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __u8 protocol, int ttl) +{ + struct iphdr *niph, *oiph = ip_hdr(oldskb); + + skb_reset_network_header(nskb); + niph = skb_put(nskb, sizeof(struct iphdr)); + niph->version = 4; + niph->ihl = sizeof(struct iphdr) / 4; + niph->tos = 0; + niph->id = 0; + niph->frag_off = htons(IP_DF); + niph->protocol = protocol; + niph->check = 0; + niph->saddr = oiph->daddr; + niph->daddr = oiph->saddr; + niph->ttl = ttl; + + nskb->protocol = htons(ETH_P_IP); + + return niph; +} +EXPORT_SYMBOL_GPL(nf_reject_iphdr_put); + +void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, + const struct tcphdr *oth) +{ + struct iphdr *niph = ip_hdr(nskb); + struct tcphdr *tcph; + + skb_reset_transport_header(nskb); + tcph = skb_put_zero(nskb, sizeof(struct tcphdr)); + tcph->source = oth->dest; + tcph->dest = oth->source; + tcph->doff = sizeof(struct tcphdr) / 4; + + if (oth->ack) { + tcph->seq = oth->ack_seq; + } else { + tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + + oldskb->len - ip_hdrlen(oldskb) - + (oth->doff << 2)); + tcph->ack = 1; + } + + tcph->rst = 1; + tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, + niph->daddr, 0); + nskb->ip_summed = CHECKSUM_PARTIAL; + nskb->csum_start = (unsigned char *)tcph - nskb->head; + nskb->csum_offset = offsetof(struct tcphdr, check); +} +EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put); + +static int nf_reject_fill_skb_dst(struct sk_buff *skb_in) +{ + struct dst_entry *dst = NULL; + struct flowi fl; + + memset(&fl, 0, sizeof(struct flowi)); + fl.u.ip4.daddr = ip_hdr(skb_in)->saddr; + nf_ip_route(dev_net(skb_in->dev), &dst, &fl, false); + if (!dst) + return -1; + + skb_dst_set(skb_in, dst); + return 0; +} + +/* Send RST reply */ +void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb, + int hook) +{ + struct sk_buff *nskb; + struct iphdr *niph; + const struct tcphdr *oth; + struct tcphdr _oth; + + oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook); + if (!oth) + return; + + if ((hook == NF_INET_PRE_ROUTING || hook == NF_INET_INGRESS) && + nf_reject_fill_skb_dst(oldskb) < 0) + return; + + if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + /* ip_route_me_harder expects skb->dst to be set */ + skb_dst_set_noref(nskb, skb_dst(oldskb)); + + nskb->mark = IP4_REPLY_MARK(net, oldskb->mark); + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, + ip4_dst_hoplimit(skb_dst(nskb))); + nf_reject_ip_tcphdr_put(nskb, oldskb, oth); + if (ip_route_me_harder(net, sk, nskb, RTN_UNSPEC)) + goto free_nskb; + + niph = ip_hdr(nskb); + + /* "Never happens" */ + if (nskb->len > dst_mtu(skb_dst(nskb))) + goto free_nskb; + + nf_ct_attach(nskb, oldskb); + nf_ct_set_closing(skb_nfct(oldskb)); + +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + /* If we use ip_local_out for bridged traffic, the MAC source on + * the RST will be ours, instead of the destination's. This confuses + * some routers/firewalls, and they drop the packet. So we need to + * build the eth header using the original destination's MAC as the + * source, and send the RST packet directly. + */ + if (nf_bridge_info_exists(oldskb)) { + struct ethhdr *oeth = eth_hdr(oldskb); + struct net_device *br_indev; + + br_indev = nf_bridge_get_physindev(oldskb, net); + if (!br_indev) + goto free_nskb; + + nskb->dev = br_indev; + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), + oeth->h_source, oeth->h_dest, nskb->len) < 0) + goto free_nskb; + dev_queue_xmit(nskb); + } else +#endif + ip_local_out(net, nskb->sk, nskb); + + return; + + free_nskb: + kfree_skb(nskb); +} +EXPORT_SYMBOL_GPL(nf_send_reset); + +void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) +{ + struct iphdr *iph = ip_hdr(skb_in); + int dataoff = ip_hdrlen(skb_in); + u8 proto = iph->protocol; + + if (iph->frag_off & htons(IP_OFFSET)) + return; + + if ((hook == NF_INET_PRE_ROUTING || hook == NF_INET_INGRESS) && + nf_reject_fill_skb_dst(skb_in) < 0) + return; + + if (skb_csum_unnecessary(skb_in) || + !nf_reject_verify_csum(skb_in, dataoff, proto)) { + icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); + return; + } + + if (nf_ip_checksum(skb_in, hook, dataoff, proto) == 0) + icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); +} +EXPORT_SYMBOL_GPL(nf_send_unreach); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c new file mode 100644 index 0000000000..a1350fc258 --- /dev/null +++ b/net/ipv4/netfilter/nf_socket_ipv4.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2007-2008 BalaBit IT Ltd. + * Author: Krisztian Kovacs + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/tcp.h> +#include <net/udp.h> +#include <net/icmp.h> +#include <net/sock.h> +#include <net/inet_sock.h> +#include <net/netfilter/nf_socket.h> +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack.h> +#endif + +static int +extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, + __be32 *raddr, __be32 *laddr, + __be16 *rport, __be16 *lport) +{ + unsigned int outside_hdrlen = ip_hdrlen(skb); + struct iphdr *inside_iph, _inside_iph; + struct icmphdr *icmph, _icmph; + __be16 *ports, _ports[2]; + + icmph = skb_header_pointer(skb, outside_hdrlen, + sizeof(_icmph), &_icmph); + if (icmph == NULL) + return 1; + + if (!icmp_is_err(icmph->type)) + return 1; + + inside_iph = skb_header_pointer(skb, outside_hdrlen + + sizeof(struct icmphdr), + sizeof(_inside_iph), &_inside_iph); + if (inside_iph == NULL) + return 1; + + if (inside_iph->protocol != IPPROTO_TCP && + inside_iph->protocol != IPPROTO_UDP) + return 1; + + ports = skb_header_pointer(skb, outside_hdrlen + + sizeof(struct icmphdr) + + (inside_iph->ihl << 2), + sizeof(_ports), &_ports); + if (ports == NULL) + return 1; + + /* the inside IP packet is the one quoted from our side, thus + * its saddr is the local address */ + *protocol = inside_iph->protocol; + *laddr = inside_iph->saddr; + *lport = ports[0]; + *raddr = inside_iph->daddr; + *rport = ports[1]; + + return 0; +} + +static struct sock * +nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, + const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in) +{ + switch (protocol) { + case IPPROTO_TCP: + return inet_lookup(net, net->ipv4.tcp_death_row.hashinfo, + skb, doff, saddr, sport, daddr, dport, + in->ifindex); + case IPPROTO_UDP: + return udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + } + return NULL; +} + +struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, + const struct net_device *indev) +{ + __be32 daddr, saddr; + __be16 dport, sport; + const struct iphdr *iph = ip_hdr(skb); + struct sk_buff *data_skb = NULL; + u8 protocol; +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + enum ip_conntrack_info ctinfo; + struct nf_conn const *ct; +#endif + int doff = 0; + + if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { + struct tcphdr _hdr; + struct udphdr *hp; + + hp = skb_header_pointer(skb, ip_hdrlen(skb), + iph->protocol == IPPROTO_UDP ? + sizeof(*hp) : sizeof(_hdr), &_hdr); + if (hp == NULL) + return NULL; + + protocol = iph->protocol; + saddr = iph->saddr; + sport = hp->source; + daddr = iph->daddr; + dport = hp->dest; + data_skb = (struct sk_buff *)skb; + doff = iph->protocol == IPPROTO_TCP ? + ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) : + ip_hdrlen(skb) + sizeof(*hp); + + } else if (iph->protocol == IPPROTO_ICMP) { + if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, + &sport, &dport)) + return NULL; + } else { + return NULL; + } + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + /* Do the lookup with the original socket address in + * case this is a reply packet of an established + * SNAT-ted connection. + */ + ct = nf_ct_get(skb, &ctinfo); + if (ct && + ((iph->protocol != IPPROTO_ICMP && + ctinfo == IP_CT_ESTABLISHED_REPLY) || + (iph->protocol == IPPROTO_ICMP && + ctinfo == IP_CT_RELATED_REPLY)) && + (ct->status & IPS_SRC_NAT_DONE)) { + + daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; + dport = (iph->protocol == IPPROTO_TCP) ? + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; + } +#endif + + return nf_socket_get_sock_v4(net, data_skb, doff, protocol, saddr, + daddr, sport, dport, indev); +} +EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v4); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); +MODULE_DESCRIPTION("Netfilter IPv4 socket lookup infrastructure"); diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c new file mode 100644 index 0000000000..69e3317996 --- /dev/null +++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2007-2008 BalaBit IT Ltd. + * Author: Krisztian Kovacs + */ + +#include <net/netfilter/nf_tproxy.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <net/inet_sock.h> +#include <linux/ip.h> +#include <net/checksum.h> +#include <net/udp.h> +#include <net/tcp.h> +#include <linux/inetdevice.h> + +struct sock * +nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, + __be32 laddr, __be16 lport, struct sock *sk) +{ + const struct iphdr *iph = ip_hdr(skb); + struct tcphdr _hdr, *hp; + + hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); + if (hp == NULL) { + inet_twsk_put(inet_twsk(sk)); + return NULL; + } + + if (hp->syn && !hp->rst && !hp->ack && !hp->fin) { + /* SYN to a TIME_WAIT socket, we'd rather redirect it + * to a listener socket if there's one */ + struct sock *sk2; + + sk2 = nf_tproxy_get_sock_v4(net, skb, iph->protocol, + iph->saddr, laddr ? laddr : iph->daddr, + hp->source, lport ? lport : hp->dest, + skb->dev, NF_TPROXY_LOOKUP_LISTENER); + if (sk2) { + nf_tproxy_twsk_deschedule_put(inet_twsk(sk)); + sk = sk2; + } + } + + return sk; +} +EXPORT_SYMBOL_GPL(nf_tproxy_handle_time_wait4); + +__be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) +{ + const struct in_ifaddr *ifa; + struct in_device *indev; + __be32 laddr; + + if (user_laddr) + return user_laddr; + + laddr = 0; + indev = __in_dev_get_rcu(skb->dev); + + in_dev_for_each_ifa_rcu(ifa, indev) { + if (ifa->ifa_flags & IFA_F_SECONDARY) + continue; + + laddr = ifa->ifa_local; + break; + } + + return laddr ? laddr : daddr; +} +EXPORT_SYMBOL_GPL(nf_tproxy_laddr4); + +struct sock * +nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, + const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, + const enum nf_tproxy_lookup_t lookup_type) +{ + struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo; + struct sock *sk; + + switch (protocol) { + case IPPROTO_TCP: { + struct tcphdr _hdr, *hp; + + hp = skb_header_pointer(skb, ip_hdrlen(skb), + sizeof(struct tcphdr), &_hdr); + if (hp == NULL) + return NULL; + + switch (lookup_type) { + case NF_TPROXY_LOOKUP_LISTENER: + sk = inet_lookup_listener(net, hinfo, skb, + ip_hdrlen(skb) + __tcp_hdrlen(hp), + saddr, sport, daddr, dport, + in->ifindex, 0); + + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too + */ + break; + case NF_TPROXY_LOOKUP_ESTABLISHED: + sk = inet_lookup_established(net, hinfo, saddr, sport, + daddr, dport, in->ifindex); + break; + default: + BUG(); + } + break; + } + case IPPROTO_UDP: + sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + if (sk) { + int connected = (sk->sk_state == TCP_ESTABLISHED); + int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too + */ + if ((lookup_type == NF_TPROXY_LOOKUP_ESTABLISHED && + (!connected || wildcard)) || + (lookup_type == NF_TPROXY_LOOKUP_LISTENER && connected)) { + sock_put(sk); + sk = NULL; + } + } + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n", + protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk); + + return sk; +} +EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs"); +MODULE_DESCRIPTION("Netfilter IPv4 transparent proxy support"); diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c new file mode 100644 index 0000000000..a522c3a3be --- /dev/null +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org> + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/ipv4/nf_dup_ipv4.h> + +struct nft_dup_ipv4 { + u8 sreg_addr; + u8 sreg_dev; +}; + +static void nft_dup_ipv4_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_dup_ipv4 *priv = nft_expr_priv(expr); + struct in_addr gw = { + .s_addr = (__force __be32)regs->data[priv->sreg_addr], + }; + int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1; + + nf_dup_ipv4(nft_net(pkt), pkt->skb, nft_hook(pkt), &gw, oif); +} + +static int nft_dup_ipv4_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_dup_ipv4 *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_DUP_SREG_ADDR] == NULL) + return -EINVAL; + + err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr, + sizeof(struct in_addr)); + if (err < 0) + return err; + + if (tb[NFTA_DUP_SREG_DEV]) + err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], + &priv->sreg_dev, sizeof(int)); + + return err; +} + +static int nft_dup_ipv4_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) +{ + struct nft_dup_ipv4 *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr)) + goto nla_put_failure; + if (priv->sreg_dev && + nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_dup_ipv4_type; +static const struct nft_expr_ops nft_dup_ipv4_ops = { + .type = &nft_dup_ipv4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_dup_ipv4)), + .eval = nft_dup_ipv4_eval, + .init = nft_dup_ipv4_init, + .dump = nft_dup_ipv4_dump, + .reduce = NFT_REDUCE_READONLY, +}; + +static const struct nla_policy nft_dup_ipv4_policy[NFTA_DUP_MAX + 1] = { + [NFTA_DUP_SREG_ADDR] = { .type = NLA_U32 }, + [NFTA_DUP_SREG_DEV] = { .type = NLA_U32 }, +}; + +static struct nft_expr_type nft_dup_ipv4_type __read_mostly = { + .family = NFPROTO_IPV4, + .name = "dup", + .ops = &nft_dup_ipv4_ops, + .policy = nft_dup_ipv4_policy, + .maxattr = NFTA_DUP_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_dup_ipv4_module_init(void) +{ + return nft_register_expr(&nft_dup_ipv4_type); +} + +static void __exit nft_dup_ipv4_module_exit(void) +{ + nft_unregister_expr(&nft_dup_ipv4_type); +} + +module_init(nft_dup_ipv4_module_init); +module_exit(nft_dup_ipv4_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "dup"); +MODULE_DESCRIPTION("IPv4 nftables packet duplication support"); diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c new file mode 100644 index 0000000000..9eee535c64 --- /dev/null +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_fib.h> + +#include <net/ip_fib.h> +#include <net/route.h> + +/* don't try to find route from mcast/bcast/zeronet */ +static __be32 get_saddr(__be32 addr) +{ + if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) || + ipv4_is_zeronet(addr)) + return 0; + return addr; +} + +#define DSCP_BITS 0xfc + +void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + int noff = skb_network_offset(pkt->skb); + u32 *dst = ®s->data[priv->dreg]; + const struct net_device *dev = NULL; + struct iphdr *iph, _iph; + __be32 addr; + + if (priv->flags & NFTA_FIB_F_IIF) + dev = nft_in(pkt); + else if (priv->flags & NFTA_FIB_F_OIF) + dev = nft_out(pkt); + + iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); + if (!iph) { + regs->verdict.code = NFT_BREAK; + return; + } + + if (priv->flags & NFTA_FIB_F_DADDR) + addr = iph->daddr; + else + addr = iph->saddr; + + *dst = inet_dev_addr_type(nft_net(pkt), dev, addr); +} +EXPORT_SYMBOL_GPL(nft_fib4_eval_type); + +void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + int noff = skb_network_offset(pkt->skb); + u32 *dest = ®s->data[priv->dreg]; + struct iphdr *iph, _iph; + struct fib_result res; + struct flowi4 fl4 = { + .flowi4_scope = RT_SCOPE_UNIVERSE, + .flowi4_iif = LOOPBACK_IFINDEX, + .flowi4_uid = sock_net_uid(nft_net(pkt), NULL), + }; + const struct net_device *oif; + const struct net_device *found; + + /* + * Do not set flowi4_oif, it restricts results (for example, asking + * for oif 3 will get RTN_UNICAST result even if the daddr exits + * on another interface. + * + * Search results for the desired outinterface instead. + */ + if (priv->flags & NFTA_FIB_F_OIF) + oif = nft_out(pkt); + else if (priv->flags & NFTA_FIB_F_IIF) + oif = nft_in(pkt); + else + oif = NULL; + + if (priv->flags & NFTA_FIB_F_IIF) + fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(oif); + + if (nft_hook(pkt) == NF_INET_PRE_ROUTING && + nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { + nft_fib_store_result(dest, priv, nft_in(pkt)); + return; + } + + iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); + if (!iph) { + regs->verdict.code = NFT_BREAK; + return; + } + + if (ipv4_is_zeronet(iph->saddr)) { + if (ipv4_is_lbcast(iph->daddr) || + ipv4_is_local_multicast(iph->daddr)) { + nft_fib_store_result(dest, priv, pkt->skb->dev); + return; + } + } + + if (priv->flags & NFTA_FIB_F_MARK) + fl4.flowi4_mark = pkt->skb->mark; + + fl4.flowi4_tos = iph->tos & DSCP_BITS; + + if (priv->flags & NFTA_FIB_F_DADDR) { + fl4.daddr = iph->daddr; + fl4.saddr = get_saddr(iph->saddr); + } else { + if (nft_hook(pkt) == NF_INET_FORWARD && + priv->flags & NFTA_FIB_F_IIF) + fl4.flowi4_iif = nft_out(pkt)->ifindex; + + fl4.daddr = iph->saddr; + fl4.saddr = get_saddr(iph->daddr); + } + + *dest = 0; + + if (fib_lookup(nft_net(pkt), &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) + return; + + switch (res.type) { + case RTN_UNICAST: + break; + case RTN_LOCAL: /* Should not see RTN_LOCAL here */ + return; + default: + break; + } + + if (!oif) { + found = FIB_RES_DEV(res); + } else { + if (!fib_info_nh_uses_dev(res.fi, oif)) + return; + found = oif; + } + + nft_fib_store_result(dest, priv, found); +} +EXPORT_SYMBOL_GPL(nft_fib4_eval); + +static struct nft_expr_type nft_fib4_type; + +static const struct nft_expr_ops nft_fib4_type_ops = { + .type = &nft_fib4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib4_eval_type, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, + .reduce = nft_fib_reduce, +}; + +static const struct nft_expr_ops nft_fib4_ops = { + .type = &nft_fib4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib4_eval, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, + .reduce = nft_fib_reduce, +}; + +static const struct nft_expr_ops * +nft_fib4_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + enum nft_fib_result result; + + if (!tb[NFTA_FIB_RESULT]) + return ERR_PTR(-EINVAL); + + result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT])); + + switch (result) { + case NFT_FIB_RESULT_OIF: + return &nft_fib4_ops; + case NFT_FIB_RESULT_OIFNAME: + return &nft_fib4_ops; + case NFT_FIB_RESULT_ADDRTYPE: + return &nft_fib4_type_ops; + default: + return ERR_PTR(-EOPNOTSUPP); + } +} + +static struct nft_expr_type nft_fib4_type __read_mostly = { + .name = "fib", + .select_ops = nft_fib4_select_ops, + .policy = nft_fib_policy, + .maxattr = NFTA_FIB_MAX, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, +}; + +static int __init nft_fib4_module_init(void) +{ + return nft_register_expr(&nft_fib4_type); +} + +static void __exit nft_fib4_module_exit(void) +{ + nft_unregister_expr(&nft_fib4_type); +} + +module_init(nft_fib4_module_init); +module_exit(nft_fib4_module_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_ALIAS_NFT_AF_EXPR(2, "fib"); +MODULE_DESCRIPTION("nftables fib / ip route lookup support"); diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c new file mode 100644 index 0000000000..6cb213bb72 --- /dev/null +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2013 Eric Leblond <eric@regit.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/ipv4/nf_reject.h> +#include <net/netfilter/nft_reject.h> + +static void nft_reject_ipv4_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_reject *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nf_send_unreach(pkt->skb, priv->icmp_code, nft_hook(pkt)); + break; + case NFT_REJECT_TCP_RST: + nf_send_reset(nft_net(pkt), nft_sk(pkt), pkt->skb, + nft_hook(pkt)); + break; + default: + break; + } + + regs->verdict.code = NF_DROP; +} + +static struct nft_expr_type nft_reject_ipv4_type; +static const struct nft_expr_ops nft_reject_ipv4_ops = { + .type = &nft_reject_ipv4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .eval = nft_reject_ipv4_eval, + .init = nft_reject_init, + .dump = nft_reject_dump, + .validate = nft_reject_validate, + .reduce = NFT_REDUCE_READONLY, +}; + +static struct nft_expr_type nft_reject_ipv4_type __read_mostly = { + .family = NFPROTO_IPV4, + .name = "reject", + .ops = &nft_reject_ipv4_ops, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_reject_ipv4_module_init(void) +{ + return nft_register_expr(&nft_reject_ipv4_type); +} + +static void __exit nft_reject_ipv4_module_exit(void) +{ + nft_unregister_expr(&nft_reject_ipv4_type); +} + +module_init(nft_reject_ipv4_module_init); +module_exit(nft_reject_ipv4_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject"); +MODULE_DESCRIPTION("IPv4 packet rejection for nftables"); |